Integration TAF2s-OEs and SRRM2-KDs

Alternative splicing analysis

Author

Niccolò Arecco

1 Intro

This report contains the code used to produce the panels of figure 6 of the paper including the re-analysis of the for SRRM2 Knockdown (KD) RNA-seq in HeLa(Zhang et al. 2024) (Bo Wang lab) and HepG2(Cui et al. 2023) (Ben Blewncowe, Alan Lambowitz & Paul Schimmel) cells.

2 Set Up

Last code execution: 2025 March 04, Tuesday @ 10:03:24.

2.1 Packages

Load packages required for the analysis and suppress any message. Check the Section 8 section at the end for package versions.

Code
suppressPackageStartupMessages( library(dplyr, warn.conflicts = F, quietly = T))
library(readr)
library(tidyr)
library(ggplot2)
library(ggforce)
library(patchwork)

suppressPackageStartupMessages( library(rstatix, quietly = TRUE) )
suppressPackageStartupMessages(library(GOfuncR))
suppressPackageStartupMessages(library(DMRichR, quietly = TRUE))
suppressPackageStartupMessages(library(org.Hs.eg.db, quietly = TRUE))
library(ggVennDiagram)

In addition I developed an R package called niar to stream line some of the common analysis steps. It can be installed from my GitHub repository using:

Code
devtools::install_github("Ni-Ar/niar")
library(niar)

2.2 Functions

This section contains custom-made functions used in this analysis.

Define the style of the bar plots.

Code
thm_bar <- theme_classic(base_family = 'Arial', base_size = 6) +
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(), 
        axis.text = element_text(colour = 'black'),
        axis.ticks.length = unit(1, 'mm'),
        
        legend.key.size = unit(1, 'mm'),
        legend.key.height = unit(1, 'mm'),
        legend.key.width = unit(1, 'mm'),
        legend.margin = margin(0, 0, 0, 0),
        legend.spacing = unit(0, "cm"),
        legend.position = 'inside',
        legend.position.inside = c(0.95, 0.18),
        legend.title = element_blank(),
        
        plot.background = element_blank(),
        
        strip.background = element_blank(),
        
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_rect(colour = 'black', fill = 'NA', linewidth = 0.2),
        panel.spacing.x = unit(3, 'mm')
        )

Define the style of the scatter plots

Code
thm_scatter <- theme_classic(base_family = 'Arial', base_size = 7) +
  theme(axis.text = element_text(colour = 'black'),
        axis.line = element_line(colour = 'black', linewidth = 0.2),
        axis.ticks.length = unit(1, 'mm'),
        
        panel.grid.major = element_line(linewidth = 0.1),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        
        legend.key.size = unit(1, 'mm'),
        legend.key.height = unit(1, 'mm'),
        legend.key.width = unit(1, 'mm'),
        legend.margin = margin(0, 0, 0, 0),
        legend.spacing = unit(0, "cm"),
        
        plot.background = element_blank())

Helper functions for the GO terms analysis.

Code
run_GOfuncR_slim <- function(input_df) {
  
  res_hyper <- GOfuncR::go_enrich(input_df, n_randset = 100, silent = T, orgDb = 'org.Hs.eg.db')
  res_silm <- DMRichR::slimGO(GO = res_hyper, tool = 'GOfuncR', annoDb = org.Hs.eg.db )
  
  return(list(res_hyper, res_silm))
}

Helper function for MATT post-processing

Code
process_exon_feats <- function(data) {
  
  LEN_feats <- c('UPEXON_MEDIANLENGTH', 'UPINTRON_MEDIANLENGTH', 'EXON_LENGTH', 'DOINTRON_MEDIANLENGTH', 'DOEXON_MEDIANLENGTH')
  RATIO_LEN_feats <- c('RATIO_UPEXON_EXON_LENGTH', 'RATIO_UPINTRON_EXON_LENGTH', 'RATIO_DOINTRON_EXON_LENGTH', 'RATIO_DOEXON_EXON_LENGTH')
  GCC_feats <- c('UPEXON_GCC', 'UPINTRON_GCC', 'EXON_GCC', 'DOINTRON_GCC', 'DOEXON_GCC')
  RATIO_GCC_feats <- c("RATIO_UPEXON_EXON_GCC", "RATIO_UPINTRON_EXON_GCC", "RATIO_DOINTRON_EXON_GCC", "RATIO_DOEXON_EXON_GCC")
  
  BPS_feats <- c('NUM_PREDICTED_BPS_UPINTRON', 'NUM_PREDICTED_BPS_DOINTRON', 
                 'BPSCORE_MAXBP_UPINTRON', 'BPSCORE_MAXBP_DOINTRON', 
                 'MEDIAN_BPSCORE_UPINTRON', 'MEDIAN_BPSCORE_DOINTRON',
                 'MEDIAN_SCORE_FOR_BPSEQ_UPINTRON', 'MEDIAN_SCORE_FOR_BPSEQ_DOINTRON', 
                 'SF1_HIGHESTSCORE_3SS_UPINTRON', 'SF1_HIGHESTSCORE_3SS_DOINTRON')
  
  PYT_feats <- c('MEDIAN_POLYPYRITRAC_OFFSET_UPINTRON', 'MEDIAN_POLYPYRITRAC_LEN_UPINTRON', 'MEDIAN_POLYPYRITRAC_SCORE_UPINTRON',
                 'MEDIAN_POLYPYRITRAC_OFFSET_DOINTRON', 'MEDIAN_POLYPYRITRAC_LEN_DOINTRON', 'MEDIAN_POLYPYRITRAC_SCORE_DOINTRON')
  
  MAX_ENT_SCR_feats <- c('MAXENTSCR_HSAMODEL_UPSTRM_5SS', 'MAXENTSCR_HSAMODEL_3SS', 
                         'MAXENTSCR_HSAMODEL_5SS', 'MAXENTSCR_HSAMODEL_DOWNSTRM_3SS')
  
  NUM_feats <- c('MEDIAN_EXON_NUMBER', 'EXON_MEDIANRANK', 'EXON_MEDIANRELATIVERANK',
                 'EXON_MEDIANRELATIVERANK_3BINS', 'EXON_MEDIANRELATIVERANK_5BINS',
                 'EXON_MEDIANRELATIVERANK_10BINS')
  
  
  ex_feats <- c(LEN_feats, RATIO_LEN_feats, GCC_feats, RATIO_GCC_feats, 
                BPS_feats, PYT_feats, MAX_ENT_SCR_feats, NUM_feats)
  
  col_to_sel <- c('GROUP', 'EVENT', ex_feats)
  
  data |>
    dplyr::select( all_of(col_to_sel) ) |>
    pivot_longer(cols = ex_feats, names_to = 'EX_FEAT', values_to = 'VALUE') |>
    mutate(EX_FEAT = factor(EX_FEAT, levels = ex_feats) ) |>
    mutate( VALUE = case_when(EX_FEAT %in% c(LEN_feats, RATIO_LEN_feats) ~ log10(VALUE+1), 
                              !EX_FEAT %in% c(LEN_feats, RATIO_LEN_feats) ~ VALUE ) ) |>
    mutate(LOCATION = EX_FEAT ) |>
    mutate(LOCATION = gsub('_MEDIANLENGTH', '', LOCATION) ) |>
    mutate(LOCATION = gsub('NUM_PREDICTED_BPS_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('BPSCORE_MAXBP_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_BPSCORE_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_SCORE_FOR_BPSEQ_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('SF1_HIGHESTSCORE_3SS_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_OFFSET_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_LEN_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_SCORE_', '', LOCATION) ) |>
    
    mutate(LOCATION = gsub('MEDIAN_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_NUMBER', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_MEDIANRANK', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_MEDIANRELATIVERANK.*$', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_LENGTH', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_EXON', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_GCC', '', LOCATION) ) |>
    mutate(LOCATION = gsub('RATIO_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_HSAMODEL', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_UPSTRM_5SS', 'UPINTRON_5SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_DOWNSTRM_3SS', 'DOINTRON_3SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_3SS', 'EXON_3SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_5SS', 'EXON_5SS', LOCATION) ) -> proc_data
  
  pretty_order <- unique(proc_data$LOCATION)
  
  tidy_data <- proc_data |>
    # calculate a delta mean
    group_by(EX_FEAT) |>
    summarise(MEAN_FEAT_ASNC = mean(VALUE[GROUP %in% 'AS_NC'], na.rm = T),
              MEAN_FEAT_CS = mean(VALUE[GROUP %in% 'CS'], na.rm = T),
              SD_FEAT_ASNC = sd(VALUE[GROUP %in% 'AS_NC'], na.rm = T),
              SD_FEAT_CS = sd(VALUE[GROUP %in% 'CS'], na.rm = T) ) |>
    left_join(x = proc_data, by = join_by(EX_FEAT) ) |>
    mutate(dF_ASNC = VALUE - MEAN_FEAT_ASNC,
           dF_CS = VALUE - MEAN_FEAT_CS ) |>
    mutate(zF_ASNC = dF_ASNC / SD_FEAT_ASNC,
           zF_CS = dF_CS / SD_FEAT_CS ) |>
    mutate(CATEGORY = case_when(EX_FEAT %in% LEN_feats ~ 'Length',
                                EX_FEAT %in% RATIO_LEN_feats ~ 'Ratio to Exon Length',
                                EX_FEAT %in% GCC_feats ~ 'GC %',
                                EX_FEAT %in% RATIO_GCC_feats ~ 'Ratio to Exon GC %',
                                EX_FEAT %in% BPS_feats ~ 'Branch Point',
                                EX_FEAT %in% PYT_feats ~ 'Poly Y Track',
                                EX_FEAT %in% NUM_feats ~ 'Rel. Position',
                                EX_FEAT %in% MAX_ENT_SCR_feats ~ 'S.S. Score')
    ) |>
    mutate(CATEGORY = factor(CATEGORY, 
                             levels = c('Length', 'Ratio to Exon Length', 'GC %',
                                        'Ratio to Exon GC %', 'Branch Point', 
                                        'Poly Y Track','S.S. Score', 'Rel. Position') ) ) |>
    mutate(LOCATION = factor(LOCATION, levels = pretty_order ) ) |>
    relocate(CATEGORY, .after = EVENT) |>
    relocate(LOCATION, .after = EX_FEAT) 

  return(tidy_data)
}

This is basically the function as above, but without the pivot_longer and a relocate.

Code
process_exon_feats_test <- function(data) {
  
  LEN_feats <- c('UPEXON_MEDIANLENGTH', 'UPINTRON_MEDIANLENGTH', 'EXON_LENGTH', 'DOINTRON_MEDIANLENGTH', 'DOEXON_MEDIANLENGTH')
  RATIO_LEN_feats <- c('RATIO_UPEXON_EXON_LENGTH', 'RATIO_UPINTRON_EXON_LENGTH', 'RATIO_DOINTRON_EXON_LENGTH', 'RATIO_DOEXON_EXON_LENGTH')
  GCC_feats <- c('UPEXON_GCC', 'UPINTRON_GCC', 'EXON_GCC', 'DOINTRON_GCC', 'DOEXON_GCC')
  RATIO_GCC_feats <- c("RATIO_UPEXON_EXON_GCC", "RATIO_UPINTRON_EXON_GCC", "RATIO_DOINTRON_EXON_GCC", "RATIO_DOEXON_EXON_GCC")
  
  BPS_feats <- c('NUM_PREDICTED_BPS_UPINTRON', 'NUM_PREDICTED_BPS_DOINTRON', 
                 'BPSCORE_MAXBP_UPINTRON', 'BPSCORE_MAXBP_DOINTRON', 
                 'MEDIAN_BPSCORE_UPINTRON', 'MEDIAN_BPSCORE_DOINTRON',
                 'MEDIAN_SCORE_FOR_BPSEQ_UPINTRON', 'MEDIAN_SCORE_FOR_BPSEQ_DOINTRON', 
                 'SF1_HIGHESTSCORE_3SS_UPINTRON', 'SF1_HIGHESTSCORE_3SS_DOINTRON')
  
  PYT_feats <- c('MEDIAN_POLYPYRITRAC_OFFSET_UPINTRON', 'MEDIAN_POLYPYRITRAC_LEN_UPINTRON', 'MEDIAN_POLYPYRITRAC_SCORE_UPINTRON',
                 'MEDIAN_POLYPYRITRAC_OFFSET_DOINTRON', 'MEDIAN_POLYPYRITRAC_LEN_DOINTRON', 'MEDIAN_POLYPYRITRAC_SCORE_DOINTRON')
  
  MAX_ENT_SCR_feats <- c('MAXENTSCR_HSAMODEL_UPSTRM_5SS', 'MAXENTSCR_HSAMODEL_3SS', 
                         'MAXENTSCR_HSAMODEL_5SS', 'MAXENTSCR_HSAMODEL_DOWNSTRM_3SS')
  
  NUM_feats <- c('MEDIAN_EXON_NUMBER', 'EXON_MEDIANRANK', 'EXON_MEDIANRELATIVERANK',
                 'EXON_MEDIANRELATIVERANK_3BINS', 'EXON_MEDIANRELATIVERANK_5BINS',
                 'EXON_MEDIANRELATIVERANK_10BINS')
  
  
  ex_feats <- c(LEN_feats, RATIO_LEN_feats, GCC_feats, RATIO_GCC_feats, 
                BPS_feats, PYT_feats, MAX_ENT_SCR_feats, NUM_feats)
  
  data |>
    mutate(EX_FEAT = factor(EX_FEAT, levels = ex_feats) ) |>
    mutate(LOCATION = EX_FEAT ) |>
    mutate(LOCATION = gsub('_MEDIANLENGTH', '', LOCATION) ) |>
    mutate(LOCATION = gsub('NUM_PREDICTED_BPS_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('BPSCORE_MAXBP_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_BPSCORE_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_SCORE_FOR_BPSEQ_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('SF1_HIGHESTSCORE_3SS_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_OFFSET_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_LEN_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MEDIAN_POLYPYRITRAC_SCORE_', '', LOCATION) ) |>
    
    mutate(LOCATION = gsub('MEDIAN_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_NUMBER', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_MEDIANRANK', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_MEDIANRELATIVERANK.*$', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_LENGTH', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_EXON', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_GCC', '', LOCATION) ) |>
    mutate(LOCATION = gsub('RATIO_', '', LOCATION) ) |>
    mutate(LOCATION = gsub('_HSAMODEL', '', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_UPSTRM_5SS', 'UPINTRON_5SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_DOWNSTRM_3SS', 'DOINTRON_3SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_3SS', 'EXON_3SS', LOCATION) ) |>
    mutate(LOCATION = gsub('MAXENTSCR_5SS', 'EXON_5SS', LOCATION) ) -> proc_data
  
  pretty_order <- unique(proc_data$LOCATION)
  
  tidy_data <- proc_data |>
    mutate(CATEGORY = case_when(EX_FEAT %in% LEN_feats ~ 'Length',
                                EX_FEAT %in% RATIO_LEN_feats ~ 'Ratio to Exon Length',
                                EX_FEAT %in% GCC_feats ~ 'GC %',
                                EX_FEAT %in% RATIO_GCC_feats ~ 'Ratio to Exon GC %',
                                EX_FEAT %in% BPS_feats ~ 'Branch Point',
                                EX_FEAT %in% PYT_feats ~ 'Poly Y Track',
                                EX_FEAT %in% NUM_feats ~ 'Rel. Position',
                                EX_FEAT %in% MAX_ENT_SCR_feats ~ 'S.S. Score')
    ) |>
    mutate(CATEGORY = factor(CATEGORY, 
                             levels = c('Length', 'Ratio to Exon Length', 'GC %',
                                        'Ratio to Exon GC %', 'Branch Point', 
                                        'Poly Y Track','S.S. Score', 'Rel. Position') ) ) |>
    mutate(LOCATION = factor(LOCATION, levels = pretty_order ) ) |>
    relocate(LOCATION, .after = EX_FEAT) 

  return(tidy_data)
}

2.3 Directories & File Paths

Here I organised all the file and directories paths I need to run the analysis and define where to save the processed tables and figures.

Code
if (local) {
    proj_dir <- file.path('~/mnt/narecco/projects/01_ALTdemix')
} else if (local == FALSE) {
    proj_dir <- file.path('~/projects/01_ALTdemix')
} else {
    stop('local is not a Logical')
}

expr_dir <- file.path(proj_dir, 'data/INCLUSION_tbl/Tanja')
vast_tools_dir <- file.path(expr_dir, 'vast_tools')
vast_out <- file.path(expr_dir, 'vast_tools/vast_out')

psi_path <- file.path(vast_out, 'INCLUSION_LEVELS_FULL-hg38-12-v251.tab')
expr_path <- file.path(vast_out, 'TPM-hg38-12-NORM.tab')

Path to the differentially spliced event, I identified.

Code
dse_dir_TAF2 <- file.path(expr_dir, 'diff_spliced_IDs/TAF2-OE')
dse_path_TAF2 <- file.path(dse_dir_TAF2, '0_HeLa_TAF2_Supplemtentary_Table_DSE_SHARED_UNIQUE.tab')

dse_dir_SRRM2 <- file.path(expr_dir, 'diff_spliced_IDs/SRRM2-KD')
dse_path_SRRM2_HeLa <- file.path(dse_dir_SRRM2, '0_HeLa_Supplemtentary_Table_DSE_SHARED_UNIQUE.tab')
dse_path_SRRM2_HepG2 <- file.path(dse_dir_SRRM2, '0_HepG2_Supplemtentary_Table_DSE_SHARED_UNIQUE.tab')
Code
cmpr_TAF2dIDR_BG_dir <- file.path(vast_out, 'compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/GO_gene_lists')
BG_IDs_path <- file.path(cmpr_TAF2dIDR_BG_dir, 'HeLa_NLSTAF2dIDR_vs_CNTRL_mrgd_noB3_pIR/BG-HeLa_NLSTAF2dIDR_vs_CNTRL_mrgd_noB3_pIR.txt')

Paths to MATT outputs

Code
TAF2_MATT_CNTLR_EXONS_path <- file.path(vast_tools_dir, 'CONTROL_EXONS_AS_CR_CS.tab')
TAF2_MATT_CNTLR_INTRONS_path <- file.path(vast_tools_dir, 'CONTROL_INTRONS_AS_CR_CS.tab')

SRRM2_MATT_CNTRLS_EXONS_path <- file.path(vast_tools_dir, 'CONTROL_EXONS_SRRM2-KD_AS_CR_CS.tab')
SRRM2_MATT_CNTRLS_INTRONS_path <- file.path(vast_tools_dir, 'CONTROL_INTRONS_SRRM2-KD_AS_CR_CS.tab')

Paths to MATT output.

Code
# TAF2
exon_feat_dir_TAF2 <- file.path(dse_dir_TAF2, 'EXONS_FEATURES_HeLa')
intron_feat_dir_TAF2 <- file.path(dse_dir_TAF2, 'INTRONS_FEATURES_HeLa')
exon_feat_path_TAF2 <- file.path(exon_feat_dir_TAF2, 'MATT_OUTPUT_EXONS_TAF2_HeLa_with_efeatures_NOSEQ.tab')
intron_feat_path_TAF2 <- file.path(intron_feat_dir_TAF2, 'MATT_OUTPUT_INTRONS_TAF2_HeLa_with_ifeatures_NOSEQ.tab')

# SRRM2
exon_feat_dir_SRRM2 <- file.path(dse_dir_SRRM2, 'EXONS_FEATURES_HeLa')
intron_feat_dir_SRRM2 <- file.path(dse_dir_SRRM2, 'INTRONS_FEATURES_HeLa')
exon_feat_path_SRRM2 <- file.path(exon_feat_dir_SRRM2, 'MATT_OUTPUT_EXONS_SRRM2_HeLa_with_efeatures_NOSEQ.tab')
intron_feat_path_SRRM2 <- file.path(intron_feat_dir_SRRM2, 'MATT_OUTPUT_EXONS_SRRM2_HeLa_with_efeatures_NOSEQ.tab')

I save the pdfs locally on my CRG OneDrive to have move them into Illustrator.

Code
plot_dir <- file.path(expr_dir, 'plots/MAIN')
dse_dir_Integration <- file.path(expr_dir, 'diff_spliced_IDs/MAIN')

tbl_dir_fig <- dse_dir_Integration
pdf_dir_fig <- plot_dir

if (!dir.exists(pdf_dir_fig)) { dir.create(pdf_dir_fig, recursive = T) }
if (!dir.exists(tbl_dir_fig)) { dir.create(tbl_dir_fig, recursive = T) }

2.4 Parameters

Set the ∆PSI threshold, the ∆PSI “squish” threshold in the correlation scatter plots.

Code
dPSI_squish <- 40
dPSI_thshld <- 15
Note

In some plots the points higher than |∆PSI| > 40 are squished and plotted at that value. This helps showing all points on the plot and keeps the data less dispersed.

Here I report the analysis code to reproduce the findings in figure 6.

3 Number of DSE

To display the number of differentially spliced events (DSE) I read the files I wrote in the previous analysis reports, integrating 3 different methods, and plot them as bar plots. This file contains all the shared and uniquely differentially spliced events between TAF2 and NLS-∆IDR-TAF2.

Code
DSE_TAF2 <- read_delim(file = dse_path_TAF2, delim = '\t', col_names = T, show_col_types = F)
  
num_DSE_TAF2 <- DSE_TAF2 |>
  select(EXPERIMENT, EVENT, AS_TYPE, DIRECTION) |>
  unique() |>
  group_by(EXPERIMENT,  AS_TYPE, DIRECTION) |>
  summarise(Num_DSE = n(), .groups = 'keep') |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(CELL_TYPE = 'HeLa')

# add fake data so that the bars in the plot have all the same width
filler_data <- data.frame(EXPERIMENT = rep(c('TAF2', 'TAF2∆IDR'), 2), 
                          AS_TYPE = c( rep("Alt. 3' ss", 2), rep("Alt. 5' ss", 2) ), 
                          CELL_TYPE = 'HeLa',
                          DIRECTION = 'DOWN', Num_DSE = -1)
  
num_DSE_TAF2 <- rbind(num_DSE_TAF2, filler_data)

Import SRRM2 Knockdown events in HeLa cells.

Code
DSE_SRRM2 <- read_delim(file = dse_path_SRRM2_HeLa,
                        delim = '\t', col_names = T, show_col_types = F)

num_DSE_SRRM2_HeLa <- DSE_SRRM2 |>
  select(EXPERIMENT, EVENT, AS_TYPE, DIRECTION) |>
  unique() |>
  group_by(EXPERIMENT,  AS_TYPE, DIRECTION) |>
  summarise(Num_DSE = n(), .groups = 'keep') |>
  mutate(EXPERIMENT = str_remove(pattern = "^HeLa-", EXPERIMENT)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(CELL_TYPE = 'HeLa')

# add fake data so that the bars in the plot have all the same width
filler_data <- data.frame(EXPERIMENT = rep(c('SRRM2-KD'), 2), 
                          AS_TYPE = c( rep("Alt. 3' ss", 2), rep("Alt. 5' ss", 2) ), 
                          CELL_TYPE = 'HeLa',
                          DIRECTION = 'DOWN', Num_DSE = -1)
  
num_DSE_SRRM2_HeLa <- rbind(num_DSE_SRRM2_HeLa, filler_data) 

For completeness I also import the differentially spliced events upon SRRM2 KD in HepG2.

Code
DSE_SRRM2_HepG2 <- read_delim(file = dse_path_SRRM2_HepG2,
                        delim = '\t', col_names = T, show_col_types = F)

num_DSE_SRRM2_HepG2 <- DSE_SRRM2_HepG2 |>
  select(EXPERIMENT, EVENT, AS_TYPE, DIRECTION) |>
  unique() |>
  group_by(EXPERIMENT, AS_TYPE, DIRECTION) |>
  summarise(Num_DSE = n(), .groups = 'keep') |>
  mutate(EXPERIMENT = str_remove(pattern = "^HepG2-", EXPERIMENT)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(CELL_TYPE = 'HepG2')

# add fake data so that the bars in the plot have all the same width
filler_data <- data.frame(EXPERIMENT = rep(c('SRRM2-KD'), 2), 
                          AS_TYPE = c( rep("Alt. 3' ss", 2), rep("Alt. 5' ss", 2) ), 
                          CELL_TYPE = 'HepG2',
                          DIRECTION = 'DOWN', Num_DSE = -1)
  
num_DSE_SRRM2_HepG2 <- rbind(num_DSE_SRRM2_HepG2, filler_data) 

Combine all the data into one data frame.

Code
Summary_events_HeLa <- rbind(num_DSE_TAF2, num_DSE_SRRM2_HeLa) |>
  mutate(EXPERIMENT = factor(EXPERIMENT, levels = c("TAF2", "TAF2∆IDR", "SRRM2-KD"))) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = rev(c("Exon", "Intron", "Alt. 3' ss", "Alt. 5' ss" ))))

Plot the numbers with bars. Figure 6C

Code
ggplot(Summary_events_HeLa) +
  aes( x = Num_DSE, y = AS_TYPE, fill = DIRECTION) +
  facet_wrap( ~EXPERIMENT, nrow = 1) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6, linewidth = 0.2 ) +
  geom_text(aes(label = Num_DSE), position = position_dodge(width = 0.7), hjust = 1.05, colour = 'white', size = 1 ) + 
  scale_fill_manual(values = c('UP' = 'firebrick2', 'DOWN' = 'dodgerblue2', 'BOTH' = 'purple4') ) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.05), add = 0), 
                     breaks = c(0, 150, 300, 450),
                     limits = c(0, NA)) +
  scale_y_discrete(expand = expansion(mult = 0.05, add = 0) ) +
  labs(x = 'Number of AS events') +
  thm_bar

Save to pdf.

Code
ggsave(filename = paste0('Fig_6C_Num_SHARED_UNIQUELY_DSE_dPSI', dPSI_thshld, '_Bar.pdf'), plot = last_plot(), 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 7,
       height = 4.0)

Also plot the data with the HepG2 SRRM2 KD

Combine all the data into one data frame.

Code
Summary_events <- rbind(Summary_events_HeLa, num_DSE_SRRM2_HepG2) |>
  mutate(EXPERIMENT = factor(EXPERIMENT, levels = c("TAF2", "TAF2∆IDR", "SRRM2-KD"))) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = rev(c("Exon", "Intron", "Alt. 3' ss", "Alt. 5' ss" ))))
Code
ggplot(Summary_events) +
  aes( x = Num_DSE, y = AS_TYPE, fill = DIRECTION) +
  facet_wrap( ~EXPERIMENT+CELL_TYPE, nrow = 1) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6, linewidth = 0.2 ) +
  geom_text(aes(label = Num_DSE), position = position_dodge(width = 0.7), hjust = 1.05, colour = 'white', size = 1 ) + 
  scale_fill_manual(values = c('UP' = 'firebrick2', 'DOWN' = 'dodgerblue2', 'BOTH' = 'purple4') ) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.05), add = 0), 
                     breaks = c(0, 150, 300, 450, 600, 750),
                     limits = c(0, NA)) +
  scale_y_discrete(expand = expansion(mult = 0.05, add = 0) ) +
  labs(x = 'Number of AS events') +
  thm_bar +
  theme(panel.spacing = unit(0.1, "lines"),
        strip.text = element_text(margin = margin(t = 0, b = 0)))

Code
ggsave(filename = paste0('Fig_Extra_Num_SHARED_UNIQUELY_DSE_dPSI', dPSI_thshld, '_Bar.pdf'), plot = last_plot(), 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 10,
       height = 4)

Prepare a supplementary table.

Code
rename_DSE_TAF2 <- DSE_TAF2
rename_DSE_TAF2[rename_DSE_TAF2$EXPERIMENT == 'TAF2∆IDR', ]$EXPERIMENT <- 'TAF2dIDR'

write_delim(x = rename_DSE_TAF2, file = file.path(tbl_dir_fig, 'Supplementary_Table_TAF2_AS_Results.tsv'), 
            delim = '\t', col_names = T, append = F, quote = 'none')

Export also SRRM2 data to file.

Code
write_delim(x = DSE_SRRM2, file = file.path(tbl_dir_fig, 'Supplementary_Table_SRRM2_AS_Results.tsv'), 
            delim = '\t', col_names = T, append = F, quote = 'none')

Combine the two tables in Excel outside of R.

4 Venn diagramms

Calculate overlap between experiments by extracting the alternatively spliced events ID and measure the intersection between all experiments.

Code
sel_dPSI_TAF2_HeLa <- subset(DSE_TAF2, EXPERIMENT == 'TAF2') |>
  select( c(GENE, AS_TYPE, EVENT, dPSI_TAF2 ) ) |> 
  unique() |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = c("Exon", "Intron","Alt. 3' ss", "Alt. 5' ss") ) )

sel_dPSI_TAF2dIDR_HeLa <- subset(DSE_TAF2, EXPERIMENT == 'TAF2∆IDR' ) |> 
  select( c(GENE, AS_TYPE, EVENT, dPSI_TAF2dIDR ) ) |>
  unique() |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = c("Exon", "Intron","Alt. 3' ss", "Alt. 5' ss") ) )

sel_dPSI_SRRM2_HeLa <- DSE_SRRM2 |>
  select( c(GENE, AS_TYPE, EVENT, dPSI_SRRM2_HeLa ) ) |>
  unique() |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = c("Exon", "Intron","Alt. 3' ss", "Alt. 5' ss") ) )

sel_dPSI_SRRM2_HepG2 <- DSE_SRRM2_HepG2 |>
  select( c(GENE, AS_TYPE, EVENT, dPSI_SRRM2_HepG2 ) ) |>
  unique() |>
  mutate(AS_TYPE = str_replace(pattern = 'Acceptor', replacement = "3' ss", AS_TYPE)) |>
  mutate(AS_TYPE = str_replace(pattern = 'Donor', replacement = "5' ss", AS_TYPE)) |>
  mutate(AS_TYPE = factor(AS_TYPE, levels = c("Exon", "Intron","Alt. 3' ss", "Alt. 5' ss") ) )

ID_TAF2 <- sel_dPSI_TAF2_HeLa$EVENT
stopifnot(length(ID_TAF2) == nrow(sel_dPSI_TAF2_HeLa))

ID_TAF2dIDR <- sel_dPSI_TAF2dIDR_HeLa$EVENT
stopifnot(length(ID_TAF2dIDR) == nrow(sel_dPSI_TAF2dIDR_HeLa))

ID_SRRM2 <- sel_dPSI_SRRM2_HeLa$EVENT
stopifnot(length(ID_SRRM2) == nrow(sel_dPSI_SRRM2_HeLa))

ID_SRRM2_HepG2 <- sel_dPSI_SRRM2_HepG2$EVENT
stopifnot(length(ID_SRRM2_HepG2) == nrow(sel_dPSI_SRRM2_HepG2))

Check numbers of events that are differentially expressed in all datasets.

Code
message("Intersect TAF2 and ∆IDR: ", length(intersect(ID_TAF2, ID_TAF2dIDR)))
Intersect TAF2 and ∆IDR: 305
Code
message("Intersect TAF2 and SRRM2: ", length(intersect(ID_TAF2, ID_SRRM2)))
Intersect TAF2 and SRRM2: 41
Code
message("Intersect ∆IDR and SRRM2: ", length(intersect(ID_TAF2dIDR, ID_SRRM2)))
Intersect ∆IDR and SRRM2: 37
Code
message("Intersect TAF2, ∆IDR and SRRM2: ", length( intersect(intersect(ID_TAF2, ID_TAF2dIDR), ID_SRRM2)) )
Intersect TAF2, ∆IDR and SRRM2: 13
Important

There’s little overlap between HeLa and HepG2 knockdown experiments.

Code
message("Intersect HeLa-SRRM2-KD and HepG2-SRRM2-KD: ", length(intersect(ID_SRRM2, ID_SRRM2_HepG2)))
Intersect HeLa-SRRM2-KD and HepG2-SRRM2-KD: 55

Generate and save to pdf the Venn diagrams.

TAF2 & NLS-TAF2-∆IDR in HeLa.

Code
ggVennDiagram(list("TAF2" = ID_TAF2, "∆IDR" = ID_TAF2dIDR) ) +
  scale_fill_distiller(palette = "Paired", direction = 1) + 
  coord_flip() +
  theme(legend.position = 'none') -> p_VD_TAF2_dIDR

p_VD_TAF2_dIDR

Code
ggsave(filename = paste0('Fig_6D_TAF2_dIDR_ALL_DSE_dPSI', dPSI_thshld, '_VD.pdf'), 
       plot = p_VD_TAF2_dIDR, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 10.16,
       height = 7.62)

TAF2 & SRRM2-KD in HeLa.

Code
ggVennDiagram(list("TAF2" = ID_TAF2, "SRRM2-KD" = ID_SRRM2) ) +
  scale_fill_distiller(palette = "Dark2", direction = 1) + 
  coord_flip() +
  theme(legend.position = 'none') -> p_VD_TAF2_SRRM2

p_VD_TAF2_SRRM2

Code
ggsave(filename = paste0('Fig_6D_TAF2_SRRM2_ALL_DSE_dPSI', dPSI_thshld, '_VD.pdf'), 
       plot = p_VD_TAF2_SRRM2, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 10.16,
       height = 7.62)

NLS-TAF2-∆IDR & SRRM2-KD in HeLa.

Code
ggVennDiagram(list("∆IDR" = ID_TAF2dIDR, "SRRM2-KD" = ID_SRRM2) ) +
  scale_fill_distiller(palette = "Accent", direction = 1) + 
  coord_flip() +
  theme(legend.position = 'none') -> p_VD_dIDR_SRRM2

p_VD_dIDR_SRRM2

Code
ggsave(filename = paste0('Fig_6D_dIDR_SRRM2_ALL_DSE_dPSI', dPSI_thshld, '_VD.pdf'), 
       plot = p_VD_dIDR_SRRM2, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 10.16,
       height = 7.62)

SRRM2-KD in HeLa & SRRM2-KD in HepG2.

Code
ggVennDiagram(list("SRRM2-KD-HeLa" = ID_SRRM2, "SRRM2-KD-HepG2" = ID_SRRM2_HepG2) ) +
  scale_fill_distiller(palette = "Spectral", direction = 1) + 
  coord_flip() +
  theme(legend.position = 'none') -> p_VD_SRRM2_HeLa_HepG2

p_VD_SRRM2_HeLa_HepG2

Code
ggsave(filename = paste0('Fig_Extra_HeLa_vs_HepG2_SRRM2-KD_ALL_DSE_dPSI', dPSI_thshld, '_VD.pdf'), 
       plot = p_VD_SRRM2_HeLa_HepG2, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 10.16,
       height = 7.62)

5 ∆PSI correlation scatter plots

Check those that are positively correlating.

Code
inner_join(x = sel_dPSI_TAF2_HeLa, y = sel_dPSI_TAF2dIDR_HeLa, by = join_by(GENE, EVENT, AS_TYPE) ) |>
  mutate(Quadrant = 
         case_when(dPSI_TAF2 >= 0 &  dPSI_TAF2dIDR >= 0 ~ "I", 
                   dPSI_TAF2 < 0 &  dPSI_TAF2dIDR >= 0 ~ "II",
                   dPSI_TAF2 < 0 &  dPSI_TAF2dIDR < 0 ~ "III",
                   dPSI_TAF2 >= 0 &  dPSI_TAF2dIDR < 0 ~ "IV")) -> DSE_COR_TAF2_dIDR
  
DSE_POS_COR_TAF2_dIDR <- subset(DSE_COR_TAF2_dIDR, Quadrant %in% c('I', 'III'))
Code
message('Positively correlating DSE between TAF2 and ∆IDR: ', nrow(DSE_POS_COR_TAF2_dIDR))
Positively correlating DSE between TAF2 and ∆IDR: 303
Code
ggplot(DSE_COR_TAF2_dIDR) +
  aes(x = dPSI_TAF2, y = dPSI_TAF2dIDR, fill = AS_TYPE) +
  geom_vline(xintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_hline(yintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_point(shape = 21, size = 2, alpha = 1, show.legend = T, stroke = 0.2) +
  geom_text_repel(aes(label = GENE), show.legend = F, size = 2, family = 'Arial') +
  scale_x_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5)) +
  scale_y_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5) ) +
  scale_fill_manual(values = c('Exon' = 'dodgerblue', 'Intron' = 'darkgreen',
                               "Alt. 3' ss" = "coral", "Alt. 5' ss" = "coral3"),
                      name = "AS type") +
  coord_fixed(ratio = 1, clip = 'off') +
  labs(x = "HeLa \u0394PSI (TAF2 OE - Cntrl)", y = "HeLa \u0394PSI (NLS-TAF2 \u0394IDR OE - Cntrl)", 
        caption = "") +
  thm_scatter +
  theme(legend.position = 'inside',
        legend.position.inside = c(0.8, 0.2) ) -> p_dPSI_TAF2_dIDR
p_dPSI_TAF2_dIDR

Code
inner_join(x = sel_dPSI_TAF2_HeLa, y = sel_dPSI_SRRM2_HeLa, by = join_by(GENE, EVENT, AS_TYPE) ) |>
  mutate(Quadrant = 
         case_when(dPSI_TAF2 >= 0 &  dPSI_SRRM2_HeLa >= 0 ~ "I", 
                   dPSI_TAF2 < 0 &  dPSI_SRRM2_HeLa >= 0 ~ "II",
                   dPSI_TAF2 < 0 &  dPSI_SRRM2_HeLa < 0 ~ "III",
                   dPSI_TAF2 >= 0 &  dPSI_SRRM2_HeLa < 0 ~ "IV")) |>
  arrange(desc(AS_TYPE)) -> DSE_COR_TAF2_SRRM2
  
# table(DSE_COR_TAF2_SRRM2$Quadrant)
DSE_POS_COR_TAF2_SRRM2 <- subset(DSE_COR_TAF2_SRRM2, Quadrant %in% c('I', 'III'))
Code
message('Positively correlating DSE between TAF2 and SRRM2: ', nrow(DSE_POS_COR_TAF2_SRRM2))
Positively correlating DSE between TAF2 and SRRM2: 30
Code
ggplot(DSE_COR_TAF2_SRRM2) +
  aes(x = dPSI_TAF2, y = dPSI_SRRM2_HeLa, fill = AS_TYPE) +
  geom_vline(xintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_hline(yintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_point(shape = 21, size = 2, alpha = 1, show.legend = T, stroke = 0.2) +
  geom_text_repel(aes(label = GENE), show.legend = F, size = 2, family = 'Arial') +
  scale_x_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5)) +
  scale_y_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5) ) +
  scale_fill_manual(values = c('Exon' = 'dodgerblue', 'Intron' = 'darkgreen',
                               "Alt. 3' ss" = "coral", "Alt. 5' ss" = "coral3"),
                      name = "AS type") +
  coord_fixed(ratio = 1, clip = 'off') +
  labs(x = "HeLa \u0394PSI (TAF2 OE - Cntrl)", y = "HeLa \u0394PSI (SRRM2 KD - Cntrl)", 
        caption = "") +
  thm_scatter +
  theme(legend.position = 'inside',
        legend.position.inside = c(0.95, 0.05),
        legend.title = element_blank() ) -> p_dPSI_TAF2_SRRM2
p_dPSI_TAF2_SRRM2

Code
inner_join(x = sel_dPSI_TAF2dIDR_HeLa, y = sel_dPSI_SRRM2_HeLa, by = join_by(GENE, EVENT, AS_TYPE) ) |>
  mutate(Quadrant = 
         case_when(dPSI_TAF2dIDR >= 0 &  dPSI_SRRM2_HeLa >= 0 ~ "I", 
                   dPSI_TAF2dIDR < 0 &  dPSI_SRRM2_HeLa >= 0 ~ "II",
                   dPSI_TAF2dIDR < 0 &  dPSI_SRRM2_HeLa < 0 ~ "III",
                   dPSI_TAF2dIDR >= 0 &  dPSI_SRRM2_HeLa < 0 ~ "IV")) |>
   arrange(desc(AS_TYPE)) -> DSE_COR_dIDR_SRRM2
  
# table(DSE_COR_dIDR_SRRM2$Quadrant)
DSE_POS_COR_dIDR_SRRM2 <- subset(DSE_COR_dIDR_SRRM2, Quadrant %in% c('I', 'III'))
Code
message('Positively correlating DSE between ∆IDR and SRRM2: ', nrow(DSE_POS_COR_dIDR_SRRM2))
Positively correlating DSE between ∆IDR and SRRM2: 28
Code
ggplot(DSE_COR_dIDR_SRRM2) +
  aes(x = dPSI_TAF2dIDR, y = dPSI_SRRM2_HeLa, fill = AS_TYPE) +
  geom_vline(xintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_hline(yintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_point(shape = 21, size = 2, alpha = 1, show.legend = T, stroke = 0.2) +
  geom_text_repel(aes(label = GENE), show.legend = F, size = 2, family = 'Arial') +
  scale_x_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5)) +
  scale_y_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5) ) +
  scale_fill_manual(values = c('Exon' = 'dodgerblue', 'Intron' = 'darkgreen',
                               "Alt. 3' ss" = "coral", "Alt. 5' ss" = "coral3"),
                      name = "AS type") +
  coord_fixed(ratio = 1, clip = 'off') +
  labs(x = "HeLa \u0394PSI (NLS-TAF2 \u0394IDR OE - Cntrl)", y = "HeLa \u0394PSI (SRRM2 KD - Cntrl)", 
        caption = "") +
  thm_scatter +
  theme(legend.position = 'inside',
        legend.position.inside = c(0.95, 0.05),
        legend.title = element_blank() ) -> p_dPSI_dIDR_SRRM2
p_dPSI_dIDR_SRRM2

Code
inner_join(x = sel_dPSI_SRRM2_HeLa, y = sel_dPSI_SRRM2_HepG2, by = join_by(GENE, EVENT, AS_TYPE) ) |>
  mutate(Quadrant = 
         case_when(dPSI_SRRM2_HeLa >= 0 & dPSI_SRRM2_HepG2 >= 0 ~ "I", 
                   dPSI_SRRM2_HeLa < 0 &  dPSI_SRRM2_HepG2 >= 0 ~ "II",
                   dPSI_SRRM2_HeLa < 0 &  dPSI_SRRM2_HepG2 < 0 ~ "III",
                   dPSI_SRRM2_HeLa >= 0 & dPSI_SRRM2_HepG2 < 0 ~ "IV")) |>
   arrange(desc(AS_TYPE)) -> DSE_COR_SRRM2_HeLa_HepG2
  
DSE_POS_COR_SRRM2_HeLa_HepG2 <- subset(DSE_COR_SRRM2_HeLa_HepG2, Quadrant %in% c('I', 'III'))
Code
message('Positively correlating DSE between ∆IDR and SRRM2: ', nrow(DSE_POS_COR_SRRM2_HeLa_HepG2))
Positively correlating DSE between ∆IDR and SRRM2: 28
Code
ggplot(DSE_COR_SRRM2_HeLa_HepG2) +
  aes(x = dPSI_SRRM2_HepG2, y = dPSI_SRRM2_HeLa, fill = AS_TYPE) +
  geom_vline(xintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_hline(yintercept = 0, colour = 'black', linewidth = 0.2) +
  geom_point(shape = 21, size = 2, alpha = 1, show.legend = T, stroke = 0.2) +
  geom_text_repel(aes(label = GENE), show.legend = F, size = 2, family = 'Arial') +
  scale_x_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5)) +
  scale_y_continuous(n.breaks = 10, oob = scales::squish, limits = c(-dPSI_squish, dPSI_squish),
                     expand = expansion(mult = 0, add = 0.5) ) +
  scale_fill_manual(values = c('Exon' = 'dodgerblue', 'Intron' = 'darkgreen',
                               "Alt. 3' ss" = "coral", "Alt. 5' ss" = "coral3"),
                      name = "AS type") +
  coord_fixed(ratio = 1, clip = 'off') +
  labs(x = "HepG2 \u0394PSI (SRRM2 KD - Cntrl)", y = "HeLa \u0394PSI (SRRM2 KD - Cntrl)", 
        caption = "") +
  thm_scatter +
  theme(legend.position = 'inside',
        legend.position.inside = c(0.95, 0.05),
        legend.title = element_blank() ) -> p_dPSI_SRRM2_HeLa_HepG2
p_dPSI_SRRM2_HeLa_HepG2

Explain that the events with a ∆PSI lower than |15| are there because they are found differentially spliced by the diff method.

Code
wrap_plots(p_dPSI_TAF2_dIDR, p_dPSI_TAF2_SRRM2, p_dPSI_dIDR_SRRM2, nrow = 1) + 
  plot_layout(guides = "collect")

Code
wrap_plots(p_dPSI_TAF2_SRRM2, p_dPSI_dIDR_SRRM2, nrow = 1, tag_level = 'new') + 
  plot_layout(guides = "collect") + 
  plot_annotation(tag_levels = 'A') -> fig_6f_g
fig_6f_g

Fix the legend in illustrator later.

Code
ggsave(filename = paste0('Fig_6FG_CORR_DSE_dPSI_Scatter.pdf'), plot = fig_6f_g, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 12,
       height = 5.5)

Plot all SRRM2 correlations

Code
wrap_plots(p_dPSI_TAF2_SRRM2, p_dPSI_dIDR_SRRM2, p_dPSI_SRRM2_HeLa_HepG2, 
           nrow = 1, tag_level = 'new') + 
  plot_layout(guides = "collect") + 
  plot_annotation(tag_levels = 'A') -> fig_SRRM2
fig_SRRM2

Code
ggsave(filename = paste0('Fig_Extra_CORR_DSE_dPSI_Scatter.pdf'), plot = fig_SRRM2, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 30,
       height = 11)

6 GO terms

Code
BG_genes <- read_delim(BG_IDs_path, delim = '\t', col_names = 'ensembl_gene_id', show_col_types = F)
BG_genes <- grep(pattern = '^ENSG', x = BG_genes$ensembl_gene_id, value = T) 

Map the ensembl gene ids to gene names:

Code
ensembl_hsa <- gimme_mart(version = 112)
Creating Mart object:
version: 112
released in: May 2024
dataset: hsapiens_gene_ensembl
genome assembly: GRCh38.p14
Code
anno_df <- ensembl_id_2_gene_name(ensembl_gene_id = BG_genes,
                                  only_gene_name = F,
                                  verbose = T, mRt_objct = ensembl_hsa)
Dataset: hsapiens_gene_ensembl
ENSEMBL host: https://may2024.archive.ensembl.org:443/biomart/martservice?redirect=no

Get gene names.

Code
BG_GENEs <- anno_df$external_gene_name
BG_GENEs <- BG_GENEs[!is.na(BG_GENEs)]
BG_GENEs <- BG_GENEs[BG_GENEs != '' ]
message('Background gene names: ', length(BG_GENEs) )
Background gene names: 11714

Get the foreground genes of the differentially spliced exons and introns.

Code
TAF2_EX_UPDO_GENE <- subset(DSE_TAF2, EXPERIMENT == 'TAF2' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Exon") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

TAF2_IN_UPDO_GENE <- subset(DSE_TAF2, EXPERIMENT == 'TAF2' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Intron") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

message("Genes with differentially spliced exons in TAF2: ", length(TAF2_EX_UPDO_GENE))
Genes with differentially spliced exons in TAF2: 436
Code
message("Genes with differentially spliced introns in TAF2: ", length(TAF2_IN_UPDO_GENE))
Genes with differentially spliced introns in TAF2: 193
Code
dIDR_EX_UPDO_GENE <- subset(DSE_TAF2, EXPERIMENT == 'TAF2∆IDR' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Exon") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

dIDR_IN_UPDO_GENE <- subset(DSE_TAF2, EXPERIMENT == 'TAF2∆IDR' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Intron") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

message("Genes with differentially spliced exons in ∆IDR: ", length(dIDR_EX_UPDO_GENE))
Genes with differentially spliced exons in ∆IDR: 413
Code
message("Genes with differentially spliced introns in ∆IDR: ", length(dIDR_IN_UPDO_GENE))
Genes with differentially spliced introns in ∆IDR: 182
Code
SRRM2_EX_UPDO_GENE <- subset(DSE_SRRM2, EXPERIMENT == 'HeLa-SRRM2-KD' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Exon") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

SRRM2_IN_UPDO_GENE <- subset(DSE_SRRM2, EXPERIMENT == 'HeLa-SRRM2-KD' & DIRECTION %in% c("UP", "DOWN") & AS_TYPE == "Intron") |>
  subset(!is.na(GENE)) |> pull(GENE) |> unique()

message("Genes with differentially spliced exons in ∆IDR: ", length(SRRM2_EX_UPDO_GENE))
Genes with differentially spliced exons in ∆IDR: 699
Code
message("Genes with differentially spliced introns in ∆IDR: ", length(SRRM2_IN_UPDO_GENE))
Genes with differentially spliced introns in ∆IDR: 252

Discard the genes in the foreground from the background

Code
BG_GENEs <- BG_GENEs[!(BG_GENEs %in% c(TAF2_EX_UPDO_GENE, TAF2_IN_UPDO_GENE,
                                       dIDR_EX_UPDO_GENE, dIDR_IN_UPDO_GENE,
                                       SRRM2_EX_UPDO_GENE, SRRM2_IN_UPDO_GENE))]
message('Background gene names: ', length(BG_GENEs) ) # 10196
Background gene names: 10196

Create a GO terms analysis input data frame.

Code
gene_lists <- list(TAF2_EX_UPDO_GENE, TAF2_IN_UPDO_GENE,
                   dIDR_EX_UPDO_GENE, dIDR_IN_UPDO_GENE,
                   SRRM2_EX_UPDO_GENE, SRRM2_IN_UPDO_GENE,
                   BG_GENEs)

df_list <- lapply(gene_lists, function(x) as.data.frame(t(x)))
df <- do.call(bind_rows, df_list) |> t()

colnames(df) <- c("TAF2_EX_UPDO_GENE", "TAF2_IN_UPDO_GENE",
                  "dIDR_EX_UPDO_GENE", "dIDR_IN_UPDO_GENE",
                  "SRRM2_EX_UPDO_GENE", "SRRM2_IN_UPDO_GENE",
                  "BG_GENEs")

Check input length.

Code
apply(df, 2, FUN = function(x){ length(which(!is.na(x))) })
 TAF2_EX_UPDO_GENE  TAF2_IN_UPDO_GENE  dIDR_EX_UPDO_GENE  dIDR_IN_UPDO_GENE 
               436                193                413                182 
SRRM2_EX_UPDO_GENE SRRM2_IN_UPDO_GENE           BG_GENEs 
               699                252              10196 

Save to table.

Code
write_delim(x = as.data.frame(df), file = file.path(tbl_dir_fig, 'GO_Terms_Genes_Input.tsv'), 
            delim = '\t', col_names = T, quote = 'none')

6.1 Compute GO termns

TAF2 exons UP/DOWN

Code
data.frame(gene_ids = c(TAF2_EX_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(TAF2_EX_UPDO_GENE)), 
                             rep(0, length(BG_GENEs)) )) |>
  run_GOfuncR_slim() -> TAF2_EX_UPDO_RES
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 64 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 18 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 29 clusters in your GO MF terms from GOfuncR
Code
TAF2_EX_UPDO_DF <- TAF2_EX_UPDO_RES[[2]] |> mutate(GROUP = 'TAF2_EXONS')

TAF2 introns UP/DOWN

Code
data.frame(gene_ids = c(TAF2_IN_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(TAF2_IN_UPDO_GENE)), 
                             rep(0, length(BG_GENEs)) )) |>
  run_GOfuncR_slim() -> TAF2_IN_UPDO_RES
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 49 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 8 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 26 clusters in your GO MF terms from GOfuncR
Code
TAF2_IN_UPDO_DF <- TAF2_IN_UPDO_RES[[2]] |> mutate(GROUP = 'TAF2_INTRONS')

∆IDR exons UP/DOWN

Code
data.frame(gene_ids = c(dIDR_EX_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(dIDR_EX_UPDO_GENE)),
                             rep(0, length(BG_GENEs)) )) |>
  run_GOfuncR_slim() -> dIDR_EX_UPDO_RES 
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 50 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 16 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 26 clusters in your GO MF terms from GOfuncR
Code
dIDR_EX_UPDO_DF <- dIDR_EX_UPDO_RES[[2]] |> mutate(GROUP = '∆IDR_EXONS')

∆IDR introns UP/DOWN

Code
data.frame(gene_ids = c(dIDR_IN_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(dIDR_IN_UPDO_GENE) ), 
                             rep(0, length(BG_GENEs) ) )) |>
  run_GOfuncR_slim() -> dIDR_IN_UPDO_RES 
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 57 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 14 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 28 clusters in your GO MF terms from GOfuncR
Code
dIDR_IN_UPDO_DF <- dIDR_IN_UPDO_RES[[2]] |>  mutate(GROUP = '∆IDR_INTRONS')

SRRM2 exons UP/DOWN

Code
data.frame(gene_ids = c(SRRM2_EX_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(SRRM2_EX_UPDO_GENE)), 
                             rep(0, length(BG_GENEs)) )) |>
  run_GOfuncR_slim() -> SRRM2_EX_UPDO_RES 
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 40 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 12 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 17 clusters in your GO MF terms from GOfuncR
Code
SRRM2_EX_UPDO_DF <- SRRM2_EX_UPDO_RES[[2]] |> mutate(GROUP = 'SRRM2_EXONS') 

SRRM2 introns UP/DOWN

Code
data.frame(gene_ids = c(SRRM2_IN_UPDO_GENE, BG_GENEs),
           is_candidate = c( rep(1, length(SRRM2_IN_UPDO_GENE)), 
                             rep(0, length(BG_GENEs)) )) |>
  run_GOfuncR_slim() -> SRRM2_IN_UPDO_RES
Submiting results from GOfuncR to rrvgo...
rrvgo is now slimming BP GO terms from GOfuncR
There are 70 clusters in your GO BP terms from GOfuncR
rrvgo is now slimming CC GO terms from GOfuncR
There are 26 clusters in your GO CC terms from GOfuncR
rrvgo is now slimming MF GO terms from GOfuncR
There are 36 clusters in your GO MF terms from GOfuncR
Code
SRRM2_IN_UPDO_DF <- SRRM2_IN_UPDO_RES[[2]] |> mutate(GROUP = 'SRRM2_INTRONS')

Merge all results together in one table and save it to file.

Code
list_df_GOs <- list(TAF2_EX_UPDO_DF, dIDR_EX_UPDO_DF, SRRM2_EX_UPDO_DF, 
                    TAF2_IN_UPDO_DF, dIDR_IN_UPDO_DF, SRRM2_IN_UPDO_DF)

names(list_df_GOs) <- c('TAF2_EX_UPDO', 'dIDR_EX_UPDO', 'SRRM2_EX_UPDO', 
                        'TAF2_IN_UPDO', 'dIDR_IN_UPDO', 'SRRM2_IN_UPDO')

bind_rows(list_df_GOs) |>
  relocate(GROUP, .before = `Gene Ontology`) |>
  mutate(GROUP = factor(GROUP, c("TAF2_EXONS", "∆IDR_EXONS", "SRRM2_EXONS",
                                 "TAF2_INTRONS", "∆IDR_INTRONS", "SRRM2_INTRONS") ) ) -> all_GO_res

write_delim(x = all_GO_res,
            file = file.path(tbl_dir_fig, 'GO_Terms_Enrichment_Results_with_SlimGo.txt'), 
            delim = '\t', col_names = T, quote = 'all')

6.2 Plot GO termns

Decide what to plot.

Code
# TAFs exons and SRRM2 introns
one_3_shared <- Reduce(intersect, lapply(list(TAF2_EX_UPDO_DF, dIDR_EX_UPDO_DF, SRRM2_IN_UPDO_DF), function(x) x$Term ) )

sel_common_terms <- c('DNA-binding transcription factor activity', 
                      'RNA polymerase II transcription regulatory region sequence-specific DNA binding',
                      'cilium movement')

dIDR_specif_terms <- c('high voltage-gated calcium channel activity', 'hepatocyte differentiation', 
                       'negative regulation of microtubule binding')

sel_events <- c('gated channel activity', 'GTPase activator activity', 
                'histone H3K27 methyltransferase activity', 'gated channel activity',
                "cellular response to cholesterol", "Fanconi anaemia nuclear complex",
                "vagus nerve development", "transmembrane signaling receptor activity",
                "primary alcohol biosynthetic process",
                "meiosis I")
plot_terms <- c(sel_common_terms, sel_events, one_3_shared)
Code
subset(all_GO_res, Term %in% plot_terms) |>
  mutate(Terms = Term) |>
  mutate(Terms = gsub('RNA polymerase II transcription', 'pol II transc.', Terms)) |>
  mutate(Terms = gsub('sequence-specific DNA binding', 'TF binding', Terms)) |>
  mutate(Terms = gsub('membrane', 'membr.', Terms)) |>
  mutate(Terms = gsub('histone', '', Terms)) |>
  mutate(Terms = gsub('high', '', Terms)) |>
  mutate(Terms = gsub('negative', 'neg.', Terms)) |>
  mutate(Terms = gsub('transcription factor', 'TF', Terms)) |>
  mutate(Terms = gsub('projection', 'project.', Terms)) -> plot_go

Plot GO terms as a heatmap.

Code
ggplot(plot_go) +
  aes(x = GROUP, y = Terms, fill = `-log10.p-value`) +
  geom_tile(colour = 'black', linewidth = 0.2) +
  scale_x_discrete(labels = \(x) gsub(pattern = '_', replacement = '\n', x),
                   expand = expansion(0, 0)) +
  scale_y_discrete(expand = expansion(0, 0)) +
  scale_fill_viridis_c(direction = -1, name = '-log10 P val.') +
  guides( fill = guide_colourbar( barheight = unit(45, units = "mm"), 
                                  barwidth = unit(2, units = "mm") ) ) +
  theme_classic(base_family = 'Arial', base_size = 8) +
  theme(legend.title = element_blank(),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_text(colour = 'black'),
        axis.text.x = element_text(angle = 45, hjust = 1)) -> p_GO
p_GO

Save to pdf and do small adjustments in Illustrator.

Code
ggsave(filename = 'Fig_6E_GO_terms_HM.pdf', plot = p_GO, device = cairo_pdf, 
       path = pdf_dir_fig, units = 'cm', width = 9, height = 6)

7 Exons and introns feature analysis

Some notes from my analysis with MATT from the command line

7.1 Prepare the data for MATT

7.1.1 Create Exons control sets

Create sets of up to 1000 exons in different categories.

AS non constitutive (ASNC)

Code
cd vast_tools
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/AS_NC-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR-Max_dPSI5.tab GROUP AS_NC | grep -P "(HsaEX|EVENT)" | matt rand_rows - 1000 > temp_ASNC.tab

Cryptic exons (CR)

Code
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/CR-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR.tab GROUP CR | grep -P "(HsaEX|EVENT)" | matt rand_rows - 1000 > temp_CR.tab

Constitutively spliced (CS)

Code
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/CS-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR.tab GROUP CS | grep -P "(HsaEX|EVENT)" | matt rand_rows - 1000 > temp_CS.tab

Merge all in one file

Code
tail -n +2 temp_CR.tab | cat temp_ASNC.tab - > temp_ASNC_CR.tab
tail -n +2 temp_CS.tab | cat temp_ASNC_CR.tab - | cut -f 1-6,32 > temp_all.tab

Create a matt cmpr_exons compatible table

Code
matt get_vast temp_all.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > CONTROL_EXONS_AS_CR_CS.tab

7.1.2 Create introns control sets

Create sets of up to 1000 introns in different categories.

AS non constitutive (ASNC)

Code
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/AS_NC-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR-Max_dPSI5.tab GROUP AS_NC | grep -P "(HsaIN|EVENT)" | matt rand_rows - 1000 > temp_ASNC_INTRONS.tab

Cryptic introns (CR)

Code
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/CR-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR.tab GROUP CR | grep -P "(HsaIN|EVENT)" | matt rand_rows - 1000 > temp_CR_INTRON.tab

Constitutively spliced (CS)

Code
matt add_val vast_out/compare_2023_08_01/min_dPSI15_min_range0_max_dPSI5/CS-HeLa_TAF2OE_vs_CNTRL_uniq_noB3_pIR.tab GROUP CS | grep -P "(HsaIN|EVENT)" | matt rand_rows - 1000 > temp_CS_INTRON.tab

Merge all in 1 file. Column 32 is the GROUP columns

Code
tail -n +2 temp_CR_INTRON.tab | cat temp_ASNC_INTRONS.tab - > temp_ASNC_CR.tab
tail -n +2 temp_CS_INTRON.tab | cat temp_ASNC_CR.tab - | cut -f 1-6,32 > temp_all.tab

Create a matt cmpr_introns compatible table

Code
matt get_vast temp_all.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > CONTROL_INTRONS_AS_CR_CS.tab

The same thing was done for SRRM2 dataset to have a tailor made control set.

7.1.3 Prepare TAF2 exons and introns input for MATT

Code
TAF2_UP_EX <- subset(DSE_TAF2, AS_TYPE == 'Exon' & DIRECTION == 'UP') |>
  subset(EXPERIMENT == 'TAF2' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_EXONS_TAF2') # 251 exons

TAF2_DOWN_EX <- subset(DSE_TAF2, AS_TYPE == 'Exon' & DIRECTION == 'DOWN') |>
  subset(EXPERIMENT == 'TAF2' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_EXONS_TAF2') # 275 exons
Code
dIDR_UP_EX <- subset(DSE_TAF2, AS_TYPE == 'Exon' & DIRECTION == 'UP') |>
  subset(EXPERIMENT == 'TAF2∆IDR' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_EXONS_TAF2dIDR') # 234 exons

dIDR_DOWN_EX <- subset(DSE_TAF2, AS_TYPE == 'Exon' & DIRECTION == 'DOWN') |>
  subset(EXPERIMENT == 'TAF2∆IDR' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_EXONS_TAF2dIDR') # 255 exons
Code
TAF2_UP_IN <- subset(DSE_TAF2, AS_TYPE == 'Intron' & DIRECTION == 'UP') |>
  subset(EXPERIMENT == 'TAF2' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_INTRONS_TAF2') # 120 exons

TAF2_DOWN_IN <- subset(DSE_TAF2, AS_TYPE == 'Intron' & DIRECTION == 'DOWN') |>
  subset(EXPERIMENT == 'TAF2' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_INTRONS_TAF2') # 99 exons
Code
dIDR_UP_IN <- subset(DSE_TAF2, AS_TYPE == 'Intron' & DIRECTION == 'UP') |>
  subset(EXPERIMENT == 'TAF2∆IDR' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_INTRONS_TAF2dIDR') # 120 exons

dIDR_DOWN_IN <- subset(DSE_TAF2, AS_TYPE == 'Intron' & DIRECTION == 'DOWN') |>
  subset(EXPERIMENT == 'TAF2∆IDR' ) |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_TAF2, dPSI_TAF2dIDR, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_INTRONS_TAF2dIDR') # 120 exons

Write input to files temporarily to file.

Code
write_delim(x = TAF2_UP_EX, file = file.path(dse_dir_TAF2, 'TMP_UP_EXONS_HeLa_TAF2.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = TAF2_DOWN_EX, file = file.path(dse_dir_TAF2, 'TMP_DOWN_EXONS_HeLa_TAF2.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = TAF2_UP_IN, file = file.path(dse_dir_TAF2, 'TMP_UP_INTRONS_HeLa_TAF2.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = TAF2_DOWN_IN, file = file.path(dse_dir_TAF2, 'TMP_DOWN_INTRONS_HeLa_TAF2.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)
Code
write_delim(x = dIDR_UP_EX, file = file.path(dse_dir_TAF2, 'TMP_UP_EXONS_HeLa_dIDR.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = dIDR_DOWN_EX, file = file.path(dse_dir_TAF2, 'TMP_DOWN_EXONS_HeLa_dIDR.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = dIDR_UP_IN, file = file.path(dse_dir_TAF2, 'TMP_UP_INTRONS_HeLa_dIDR.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = dIDR_DOWN_IN, file = file.path(dse_dir_TAF2, 'TMP_DOWN_INTRONS_HeLa_dIDR.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Annotate these exons with matt get_vast

Code
cd ../diff_spliced_IDs/TAF2-OE/

matt get_vast TMP_UP_EXONS_HeLa_TAF2.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_EXONS_HeLa_TAF2.tab

matt get_vast TMP_DOWN_EXONS_HeLa_TAF2.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_EXONS_HeLa_TAF2.tab

matt get_vast TMP_UP_INTRONS_HeLa_TAF2.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_INTRONS_HeLa_TAF2.tab

matt get_vast TMP_DOWN_INTRONS_HeLa_TAF2.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_INTRONS_HeLa_TAF2.tab

Annotate these introns with matt get_vast

Code
matt get_vast TMP_UP_EXONS_HeLa_dIDR.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_EXONS_HeLa_dIDR.tab

matt get_vast TMP_DOWN_EXONS_HeLa_dIDR.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_EXONS_HeLa_dIDR.tab

matt get_vast TMP_UP_INTRONS_HeLa_dIDR.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_INTRONS_HeLa_dIDR.tab

matt get_vast TMP_DOWN_INTRONS_HeLa_dIDR.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_INTRONS_HeLa_dIDR.tab

Combine all exon tables into one single MATT input table.

Code
bind_rows(
  read_delim(file = file.path(dse_dir_TAF2, 'UP_EXONS_HeLa_TAF2.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'DOWN_EXONS_HeLa_TAF2.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'UP_EXONS_HeLa_dIDR.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'DOWN_EXONS_HeLa_dIDR.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = TAF2_MATT_CNTLR_EXONS_path, delim = '\t', show_col_types = FALSE)
  ) |>
  write_delim(file = file.path(dse_dir_TAF2, 'MATT_INPUT_EXONS_TAF2_HeLa.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Run the job in the cluster

Code
qsub -q long-centos79,short-centos79 -V -cwd -pe smp 4 -terse \
     -l virtual_free=12G -l h_rt=00:45:15 -N matt_exon_feat_taf2 -m ea -b y \
     matt cmpr_exons MATT_INPUT_EXONS_TAF2_HeLa.tab START END SCAFFOLD STRAND GENEID \
     /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf /no_backup/mirimia/genome_seqs/Hsa38_gDNA.fasta Hsap 150 \
     GROUP[UP_EXONS_TAF2,DOWN_EXONS_TAF2,UP_EXONS_TAF2dIDR,DOWN_EXONS_TAF2dIDR,CR,CS,AS_NC] \
     EXONS_FEATURES_HeLa -notrbts -colors:red,blue,red,blue,white,lightgray,darkgray

Combine all intron tables into one single MATT input table.

Code
bind_rows(
  read_delim(file = file.path(dse_dir_TAF2, 'UP_INTRONS_HeLa_TAF2.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'DOWN_INTRONS_HeLa_TAF2.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'UP_INTRONS_HeLa_dIDR.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_TAF2, 'DOWN_INTRONS_HeLa_dIDR.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = TAF2_MATT_CNTLR_INTRONS_path, delim = '\t', show_col_types = FALSE)
  ) |>
  write_delim(file = file.path(dse_dir_TAF2, 'MATT_INPUT_INTRONS_TAF2_HeLa.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Launch the job on the cluster

Code
qsub -q long-centos79,short-centos79 -V -cwd -pe smp 4 -terse \
     -l virtual_free=12G -l h_rt=01:15:15 -N matt_intron_feat_taf2 -m ea -b y \
     matt cmpr_introns MATT_INPUT_INTRONS_TAF2_HeLa.tab START END SCAFFOLD STRAND GENEID \
     /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf /no_backup/mirimia/genome_seqs/Hsa38_gDNA.fasta Hsap 150 \
     GROUP[UP_INTRONS_TAF2,DOWN_INTRONS_TAF2,UP_INTRONS_TAF2dIDR,DOWN_INTRONS_TAF2dIDR,CR,CS,AS_NC] \
     INTRONS_FEATURES_HeLa -notrbts -colors:red,blue,red,blue,white,lightgray,darkgray

Read the pdf report from MATT.

To easily import the results into R I remove the sequence from the efeatures.tab file with:

Code
cd EXONS_FEATURES_HeLa
cut -f 1-102 MATT_INPUT_EXONS_TAF2_HeLa_with_efeatures.tab > MATT_OUTPUT_EXONS_TAF2_HeLa_with_efeatures_NOSEQ.tab
Code
cd ../INTRONS_FEATURES_HeLa
cut -f 1-71 MATT_INPUT_INTRONS_TAF2_HeLa_with_ifeatures.tab > MATT_OUTPUT_INTRONS_TAF2_HeLa_with_ifeatures_NOSEQ.tab

7.1.4 Prepare SRRM2 exons and introns input for MATT

Code
SRRM2_UP_EX <- subset(DSE_SRRM2, AS_TYPE == 'Exon' & DIRECTION == 'UP') |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_SRRM2_HeLa, dPSI_SRRM2_HeLa, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_EXONS_HeLa_SRRM2-KD') # 380 exons

SRRM2_DOWN_EX <- subset(DSE_SRRM2, AS_TYPE == 'Exon' & DIRECTION == 'DOWN') |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_SRRM2_HeLa, dPSI_SRRM2_HeLa, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_EXONS_HeLa_SRRM2-KD') # 444 exons
Code
SRRM2_UP_IN <- subset(DSE_SRRM2, AS_TYPE == 'Intron' & DIRECTION == 'UP') |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_SRRM2_HeLa, dPSI_SRRM2_HeLa, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'UP_INTRONS_HeLa_SRRM2-KD') # 164 exons

SRRM2_DOWN_IN <- subset(DSE_SRRM2, AS_TYPE == 'Intron' & DIRECTION == 'DOWN') |>
  select(!c(EXPERIMENT, AS_TYPE, Sample, PSI, dPSI_SRRM2_HeLa, dPSI_SRRM2_HeLa, Quality_Score_Value, DIRECTION)) |>
  unique() |>
  mutate(GROUP = 'DOWN_INTRONS_HeLa_SRRM2-KD') # 444 exons

Save to a temporary table.

Code
write_delim(x = SRRM2_UP_EX, file = file.path(dse_dir_SRRM2, 'TMP_UP_EXONS_HeLa_SRRM2-KD.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = SRRM2_DOWN_EX, file = file.path(dse_dir_SRRM2, 'TMP_DOWN_EXONS_HeLa_SRRM2-KD.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = SRRM2_UP_IN, file = file.path(dse_dir_SRRM2, 'TMP_UP_INTRONS_HeLa_SRRM2-KD.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

write_delim(x = SRRM2_DOWN_IN, file = file.path(dse_dir_SRRM2, 'TMP_DOWN_INTRONS_HeLa_SRRM2-KD.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Annotate these events with matt get_vast

Code
cd ../SRRM2-KD/
matt get_vast TMP_UP_EXONS_HeLa_SRRM2-KD.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_EXONS_HeLa_SRRM2-KD.tab

matt get_vast TMP_DOWN_EXONS_HeLa_SRRM2-KD.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_EXONS_HeLa_SRRM2-KD.tab

matt get_vast TMP_UP_INTRONS_HeLa_SRRM2-KD.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > UP_INTRONS_HeLa_SRRM2-KD.tab

matt get_vast TMP_DOWN_INTRONS_HeLa_SRRM2-KD.tab COORD FullCO COMPLEX LENGTH -gtf /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf > DOWN_INTRONS_HeLa_SRRM2-KD.tab

Combine all exon tables into one table.

Code
bind_rows(
  read_delim(file = file.path(dse_dir_SRRM2, 'UP_EXONS_HeLa_SRRM2-KD.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_SRRM2, 'DOWN_EXONS_HeLa_SRRM2-KD.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = SRRM2_MATT_CNTRLS_EXONS_path, delim = '\t', show_col_types = FALSE)
  ) |>
  write_delim(file = file.path(dse_dir_SRRM2, 'MATT_INPUT_EXONS_SRRM2_HeLa.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Run the job in the cluster

Code
qsub -q long-centos79,short-centos79 -V -cwd -pe smp 4 -terse \
     -l virtual_free=12G -l h_rt=00:30:15 -N matt_exon_features -m ea -b y \
     matt cmpr_exons MATT_INPUT_EXONS_SRRM2_HeLa.tab START END SCAFFOLD STRAND GENEID \
     /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf /no_backup/mirimia/genome_seqs/Hsa38_gDNA.fasta Hsap 150 \
     GROUP[UP_EXONS_HeLa_SRRM2-KD,DOWN_EXONS_HeLa_SRRM2-KD,CR,CS,AS_NC] \
     EXONS_FEATURES_HeLa -notrbts -colors:red,blue,white,lightgray,darkgray

Combine all intron tables into one table.

Code
bind_rows(
  read_delim(file = file.path(dse_dir_SRRM2, 'UP_INTRONS_HeLa_SRRM2-KD.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = file.path(dse_dir_SRRM2, 'DOWN_INTRONS_HeLa_SRRM2-KD.tab'), delim = '\t', show_col_types = FALSE),
  read_delim(file = SRRM2_MATT_CNTRLS_INTRONS_path, delim = '\t', show_col_types = FALSE)
  ) |>
  write_delim(file = file.path(dse_dir_SRRM2, 'MATT_INPUT_INTRONS_SRRM2_HeLa.tab'), 
            delim = '\t', quote = 'none', col_names = TRUE)

Launch the job on the

Code
qsub -q long-centos79,short-centos79 -V -cwd -pe smp 4 -terse \
     -l virtual_free=12G -l h_rt=02:30:15 -N matt_intron_features -m ea -b y \
     matt cmpr_introns MATT_INPUT_INTRONS_SRRM2_HeLa.tab START END SCAFFOLD STRAND GENEID \
     /no_backup/mirimia/genome_annots/ensembl/Hsa38.gtf /no_backup/mirimia/genome_seqs/Hsa38_gDNA.fasta Hsap 150 \
     GROUP[UP_INTRONS_HeLa_SRRM2-KD,DOWN_INTRONS_HeLa_SRRM2-KD,CR,CS,AS_NC] \
     INTRONS_FEATURES_HeLa -notrbts -colors:red,blue,white,lightgray,darkgray

To easily import the results into R I remove the sequence from the efeatures.tab file with:

Code
cd EXONS_FEATURES_HeLa
cut -f 1-102 MATT_INPUT_EXONS_SRRM2_HeLa_with_efeatures.tab > MATT_OUTPUT_EXONS_SRRM2_HeLa_with_efeatures_NOSEQ.tab
Code
cd INTRONS_FEATURES_HeLa
cut -f 1-71 MATT_INPUT_INTRONS_SRRM2_HeLa_with_ifeatures.tab > MATT_OUTPUT_INTRONS_SRRM2_HeLa_with_ifeatures_NOSEQ.tab

7.2 Process MATT Analysis output

Import exon features from MATT table output into R and process the results with a custom-made function I wrote.

Code
Ex_feat_TAF2 <- read_delim(file = exon_feat_path_TAF2, delim = "\t",
                           escape_double = FALSE, trim_ws = TRUE,
                           show_col_types = FALSE, col_names = TRUE) 

Ex_feat_SRRM2 <- read_delim(file = exon_feat_path_SRRM2, delim = "\t",
                            escape_double = FALSE, trim_ws = TRUE, 
                            show_col_types = FALSE, col_names = TRUE) 

EF_TAF2 <- process_exon_feats(data = Ex_feat_TAF2)

EF_SRRM2 <- process_exon_feats(data = Ex_feat_SRRM2)

Combine the 2 dataset into one dataframe.

Code
EF_TAF2_SRRM2 <- rbind( EF_TAF2,  EF_SRRM2)

Keep only most important features to simplify the plot.

Code
LEN_feats <- c('UPEXON_MEDIANLENGTH', 'UPINTRON_MEDIANLENGTH', 'EXON_LENGTH', 'DOINTRON_MEDIANLENGTH', 'DOEXON_MEDIANLENGTH')
GCC_feats <- c('UPEXON_GCC', 'UPINTRON_GCC', 'EXON_GCC', 'DOINTRON_GCC', 'DOEXON_GCC')
MAX_ENT_SCR_feats <- c('MAXENTSCR_HSAMODEL_UPSTRM_5SS', 'MAXENTSCR_HSAMODEL_3SS', 
                       'MAXENTSCR_HSAMODEL_5SS', 'MAXENTSCR_HSAMODEL_DOWNSTRM_3SS')
EX_FEAT_SEL <- c(LEN_feats, GCC_feats, MAX_ENT_SCR_feats,
                 'EXON_MEDIANRELATIVERANK', 
                 'MEDIAN_POLYPYRITRAC_OFFSET_UPINTRON', 'MEDIAN_POLYPYRITRAC_OFFSET_DOINTRON',
                 'BPSCORE_MAXBP_UPINTRON', 'BPSCORE_MAXBP_DOINTRON')

Reshape data.

Code
cmb_EF <- EF_TAF2_SRRM2 |>
  subset( !CATEGORY %in% c('Ratio to Exon Length', 'Ratio to Exon GC %') ) |>
  subset(EX_FEAT %in% EX_FEAT_SEL) |>
  subset(GROUP != 'CS') |>
  subset(GROUP != 'CR') |>
  mutate(GROUP2 = GROUP) |>
  mutate(GROUP2 = str_remove(pattern = 'HeLa_', string = GROUP2),
         GROUP2 = str_remove(pattern = '-KD', string = GROUP2),
         GROUP2 = str_replace(pattern = 'DOWN_', replacement = "D_", string = GROUP2),
         GROUP2 = str_replace(pattern = 'UP_', replacement = "U_", string = GROUP2),
         GROUP2 = str_replace(pattern = '_TAF2dIDR', replacement = "_dIDR", string = GROUP2),
         GROUP2 = str_remove(pattern = 'EXONS_', string = GROUP2) )

Calculate a mean z-Score for each feature at different locations.

Code
GROUP2_fct <- rev(c("D_SRRM2", "D_dIDR",  "D_TAF2", "AS_NC", "U_SRRM2", "U_dIDR", "U_TAF2"))

cmb_EF |>
  group_by(CATEGORY, LOCATION, EX_FEAT, GROUP, GROUP2) |>
  summarise(mean_zF_ASNC = mean(zF_ASNC, na.rm = TRUE), 
            .groups = 'keep') |>
   mutate(GROUP2 = factor(GROUP2, levels = GROUP2_fct) ) -> cmbnd_EF_cat

Select columns and prepare the dataset to run a non-parametric Wilcoxon test for each exon feature. It compares the VALUE (feature numeric values) between groups in GROUP2 (type of feature) using 'AS_NC' (alternatively spliced non changing exons) as the reference, then adjusts the P values with the BH (Benjamini–Hochberg) method and adds significance labels to the results.

Code
cmb_EF |>
  select(c(GROUP2, EVENT, EX_FEAT, VALUE)) |>
  droplevels() |>
  group_by(EX_FEAT) |>
  wilcox_test(formula = VALUE ~ GROUP2, ref.group = 'AS_NC', alternative = "two.sided", 
         paired = F, p.adjust.method = 'none', conf.level = 0.95, detailed = F) |>
  adjust_pvalue(method = "BH") |> add_significance("p.adj") -> res_test_EF

Reshape test results dataframe for plot. Remove non significant (ns) features.

Code
tidy_res_test_EF <- subset(res_test_EF, p.adj.signif != 'ns') |>
  select(EX_FEAT, group2, p, p.adj, p.adj.signif) |>
  process_exon_feats_test() |>
  subset( !CATEGORY %in% c('Ratio to Exon Length', 'Ratio to Exon GC %') ) 
message('Number of significant features: ', nrow(tidy_res_test_EF) )
Number of significant features: 16

7.3 Plot Exon Features Heatmap

Here facet_row splits the plot into multiple rows, with each row representing a different level of the CATEGORY variable. It also allows each row to have its own x-axis scale and custom labels (via labeller) for clearer facet titles.

Code
ggplot(cmbnd_EF_cat) +
  aes(x = LOCATION, y = GROUP2) +
  facet_row( ~CATEGORY, scales = 'free_x', space = "free",
             labeller = labeller(CATEGORY = c("S.S. Score" = "Max Entropy Score",
                                              "Poly Y Track" = "PYT offset\nfrom 3'ss",
                                              "Branch Point" = "Branch Point\nScore",
                                              "Rel. Position" = "Rel.\nPosition")
                                 ) ) +
  geom_tile(aes(fill = mean_zF_ASNC), colour = 'black') +
  # Add significance annotations from tidy_res_test_EF:
  geom_text(data = tidy_res_test_EF, inherit.aes = F, 
            mapping = aes(x = LOCATION, y = group2, label = p.adj.signif),
            size = 2, color = "black", vjust = 0.75) +
  scale_fill_gradient2(low = 'dodgerblue3', mid = 'white', high = 'firebrick3', 
                       midpoint = 0, name = 'Feature Mean Z-Score', na.value = 'gray') +
  scale_x_discrete(expand = expansion(mult = 0, add = 0)) +
  scale_y_discrete(expand = expansion(mult = 0, add = 0)) +
  coord_cartesian(clip = 'off') +
  guides(
    fill = guide_colourbar(
      barheight = unit(2.5, units = "mm"),
      barwidth = unit(8, units = "cm"),
      title.vjust = 1 ) ) +
  theme(axis.text = element_text(colour = 'black', family = 'Arial'),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        strip.background = element_blank(),
        panel.background = element_blank(),
        panel.border = element_blank(),
        panel.grid = element_blank(),
        plot.background = element_blank(),
        legend.position = 'bottom')  -> p_heatmap
p_heatmap

Save to pdf and improve in Illustrator.

Code
ggsave(filename = 'Fig_6H_EX_FEATURES_HM.pdf', plot = p_heatmap, 
       device = cairo_pdf, path = pdf_dir_fig, units = 'cm', width = 16,
       height = 7)

End of the analysis.

8 Session Info

Code
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.2 (2023-10-31)
 os       macOS Monterey 12.7.3
 system   x86_64, darwin20
 ui       X11
 language (EN)
 collate  C
 ctype    UTF-8
 tz       Europe/Madrid
 date     2025-03-04
 pandoc   3.4 @ /Applications/quarto/bin/tools/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 ! package                           * version    date (UTC) lib source
   abind                               1.4-5      2016-07-21 [1] CRAN (R 4.3.0)
   ade4                                1.7-22     2023-02-06 [1] CRAN (R 4.3.0)
   annotate                            1.80.0     2023-10-24 [1] Bioconductor
   AnnotationDbi                     * 1.64.1     2023-11-03 [1] Bioconductor
   AnnotationFilter                    1.26.0     2023-10-24 [1] Bioconductor
   AnnotationHub                       3.10.1     2024-04-05 [1] Bioconductor 3.18 (R 4.3.3)
   annotatr                            1.28.0     2023-10-24 [1] Bioconductor
   ape                                 5.8        2024-04-11 [1] CRAN (R 4.3.2)
   aplot                               0.2.3      2024-06-17 [1] CRAN (R 4.3.3)
   askpass                             1.2.0      2023-09-03 [1] CRAN (R 4.3.0)
   backports                           1.5.0      2024-05-23 [1] CRAN (R 4.3.3)
   base64                              2.0.1      2022-08-19 [1] CRAN (R 4.3.0)
   base64enc                           0.1-3      2015-07-28 [1] CRAN (R 4.3.0)
   beachmat                            2.18.1     2024-02-14 [1] Bioconductor 3.18 (R 4.3.2)
   beanplot                            1.3.1      2022-04-09 [1] CRAN (R 4.3.0)
   Biobase                           * 2.62.0     2023-10-24 [1] Bioconductor
   BiocFileCache                       2.10.2     2024-03-27 [1] Bioconductor 3.18 (R 4.3.3)
   BiocGenerics                      * 0.48.1     2023-11-01 [1] Bioconductor
   BiocIO                              1.12.0     2023-10-24 [1] Bioconductor
   BiocManager                         1.30.25    2024-08-28 [1] CRAN (R 4.3.3)
   BiocParallel                        1.36.0     2023-10-24 [1] Bioconductor
   BiocSingular                        1.18.0     2023-10-24 [1] Bioconductor
   BiocVersion                         3.18.1     2023-11-15 [1] Bioconductor
   biocViews                           1.70.0     2023-10-24 [1] Bioconductor
   biomaRt                             2.58.2     2024-01-30 [1] Bioconductor 3.18 (R 4.3.2)
   Biostrings                          2.70.3     2024-03-13 [1] Bioconductor 3.18 (R 4.3.3)
   bit                                 4.0.5      2022-11-15 [1] CRAN (R 4.3.0)
   bit64                               4.0.5      2020-08-30 [1] CRAN (R 4.3.0)
   bitops                              1.0-8      2024-07-29 [1] CRAN (R 4.3.3)
   blob                                1.2.4      2023-03-17 [1] CRAN (R 4.3.0)
   boot                                1.3-31     2024-08-28 [1] CRAN (R 4.3.3)
   Boruta                              8.0.0      2022-11-12 [1] CRAN (R 4.3.0)
   broom                               1.0.6      2024-05-17 [1] CRAN (R 4.3.3)
   BSgenome                            1.70.2     2024-02-08 [1] Bioconductor 3.18 (R 4.3.2)
   bsseq                             * 1.38.0     2023-10-24 [1] Bioconductor
   bumphunter                          1.44.0     2023-10-24 [1] Bioconductor
   cachem                              1.1.0      2024-05-16 [1] CRAN (R 4.3.3)
   Cairo                               1.6-2      2023-11-28 [1] CRAN (R 4.3.0)
   car                                 3.1-2      2023-03-30 [1] CRAN (R 4.3.0)
   carData                             3.0-5      2022-01-06 [1] CRAN (R 4.3.0)
   caTools                             1.18.3     2024-09-04 [1] CRAN (R 4.3.3)
   cellranger                          1.1.0      2016-07-27 [1] CRAN (R 4.3.0)
   checkmate                           2.3.2      2024-07-29 [1] CRAN (R 4.3.3)
   ChIPseeker                          1.38.0     2023-10-24 [1] Bioconductor
   circlize                            0.4.16     2024-02-20 [1] CRAN (R 4.3.2)
   class                               7.3-22     2023-05-03 [1] CRAN (R 4.3.2)
   cli                                 3.6.3      2024-06-21 [1] CRAN (R 4.3.3)
   cluster                             2.1.6      2023-12-01 [1] CRAN (R 4.3.0)
   CMplot                              4.5.1      2024-01-19 [1] CRAN (R 4.3.0)
   codetools                           0.2-20     2024-03-31 [1] CRAN (R 4.3.2)
   colorspace                          2.1-1      2024-07-26 [1] CRAN (R 4.3.3)
   cowplot                             1.1.3      2024-01-22 [1] CRAN (R 4.3.2)
   crayon                              1.5.3      2024-06-20 [1] CRAN (R 4.3.3)
   csaw                                1.36.1     2023-12-18 [1] Bioconductor 3.18 (R 4.3.2)
   curl                                5.2.2      2024-08-26 [1] CRAN (R 4.3.3)
   data.table                          1.16.0     2024-08-27 [1] CRAN (R 4.3.3)
   DBI                                 1.2.3      2024-06-02 [1] CRAN (R 4.3.3)
   dbplyr                              2.5.0      2024-03-19 [1] CRAN (R 4.3.2)
   DelayedArray                        0.28.0     2023-10-24 [1] Bioconductor
   DelayedMatrixStats                  1.24.0     2023-10-24 [1] Bioconductor
   desc                                1.4.3      2023-12-10 [1] CRAN (R 4.3.0)
   DESeq2                              1.42.1     2024-03-06 [1] Bioconductor 3.18 (R 4.3.3)
   devtools                            2.4.5      2022-10-11 [1] CRAN (R 4.3.0)
   digest                              0.6.37     2024-08-19 [1] CRAN (R 4.3.3)
   DMRichR                           * 1.7.8      2024-09-10 [1] Github (ben-laufer/DMRichR@6586211)
   dmrseq                            * 1.22.1     2024-02-20 [1] Bioconductor 3.18 (R 4.3.2)
   doParallel                          1.0.17     2022-02-07 [1] CRAN (R 4.3.0)
   doRNG                               1.8.6      2023-01-16 [1] CRAN (R 4.3.0)
   DOSE                                3.28.2     2023-12-10 [1] Bioconductor
   dplyr                             * 1.1.4      2023-11-17 [1] CRAN (R 4.3.0)
   dqrng                               0.4.1      2024-05-28 [1] CRAN (R 4.3.3)
   DT                                  0.33       2024-04-04 [1] CRAN (R 4.3.2)
   e1071                               1.7-14     2023-12-06 [1] CRAN (R 4.3.0)
   edgeR                               4.0.16     2024-02-18 [1] Bioconductor 3.18 (R 4.3.2)
   ellipsis                            0.3.2      2021-04-29 [1] CRAN (R 4.3.0)
   enrichplot                          1.22.0     2023-10-24 [1] Bioconductor
   enrichR                             3.2        2023-04-14 [1] CRAN (R 4.3.0)
   ensembldb                           2.26.0     2023-10-24 [1] Bioconductor
   evaluate                            0.24.0     2024-06-10 [1] CRAN (R 4.3.3)
   fansi                               1.0.6      2023-12-08 [1] CRAN (R 4.3.0)
   farver                              2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
   fastmap                             1.2.0      2024-05-15 [1] CRAN (R 4.3.3)
   fastmatch                           1.1-4      2023-08-18 [1] CRAN (R 4.3.0)
   fgsea                               1.28.0     2023-10-24 [1] Bioconductor
   filelock                            1.0.3      2023-12-11 [1] CRAN (R 4.3.0)
   forcats                             1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
   foreach                             1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
   foreign                             0.8-87     2024-06-26 [1] CRAN (R 4.3.3)
   Formula                             1.2-5      2023-02-24 [1] CRAN (R 4.3.0)
   fs                                  1.6.4      2024-04-25 [1] CRAN (R 4.3.2)
   genefilter                          1.84.0     2023-10-24 [1] Bioconductor
   generics                            0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
   GenomeInfoDb                      * 1.38.8     2024-03-15 [1] Bioconductor 3.18 (R 4.3.3)
   GenomeInfoDbData                    1.2.11     2024-02-28 [1] Bioconductor
   GenomicAlignments                   1.38.2     2024-01-16 [1] Bioconductor 3.18 (R 4.3.2)
   GenomicFeatures                     1.54.4     2024-03-13 [1] Bioconductor 3.18 (R 4.3.3)
   GenomicRanges                     * 1.54.1     2023-10-29 [1] Bioconductor
   GEOquery                            2.70.0     2023-10-24 [1] Bioconductor
   GetoptLong                          1.0.5      2020-12-15 [1] CRAN (R 4.3.0)
   ggalluvial                          0.12.5     2023-02-22 [1] CRAN (R 4.3.0)
   ggfittext                           0.10.2     2024-02-01 [1] CRAN (R 4.3.2)
   ggforce                           * 0.4.2      2024-02-19 [1] CRAN (R 4.3.2)
   ggfun                               0.1.6      2024-08-28 [1] CRAN (R 4.3.3)
   ggplot2                           * 3.5.1      2024-04-23 [1] CRAN (R 4.3.2)
   ggplotify                           0.1.2      2023-08-09 [1] CRAN (R 4.3.0)
   ggraph                              2.2.1      2024-03-07 [1] CRAN (R 4.3.2)
   ggrepel                             0.9.6      2024-09-07 [1] CRAN (R 4.3.3)
   ggsci                               3.2.0      2024-06-18 [1] CRAN (R 4.3.3)
   ggseqlogo                           0.2        2024-02-09 [1] CRAN (R 4.3.2)
   ggtree                              3.10.1     2024-02-25 [1] Bioconductor 3.18 (R 4.3.2)
   ggVennDiagram                     * 1.5.2      2024-02-20 [1] CRAN (R 4.3.2)
   Glimma                              2.12.0     2023-10-24 [1] Bioconductor
   GlobalOptions                       0.1.2      2020-06-10 [1] CRAN (R 4.3.0)
   glue                                1.7.0      2024-01-09 [1] CRAN (R 4.3.0)
   GO.db                               3.18.0     2024-05-20 [1] Bioconductor
   GOfuncR                           * 1.22.2     2024-02-04 [1] Bioconductor 3.18 (R 4.3.2)
   GOSemSim                            2.28.1     2024-01-17 [1] Bioconductor 3.18 (R 4.3.2)
   gplots                              3.1.3.1    2024-02-02 [1] CRAN (R 4.3.2)
   graph                               1.80.0     2023-10-24 [1] Bioconductor
   graphlayouts                        1.1.1      2024-03-09 [1] CRAN (R 4.3.2)
   gridBase                            0.4-7      2014-02-24 [1] CRAN (R 4.3.0)
   gridExtra                           2.3        2017-09-09 [1] CRAN (R 4.3.0)
   gridGraphics                        0.5-1      2020-12-13 [1] CRAN (R 4.3.0)
   gt                                  0.11.0     2024-07-09 [1] CRAN (R 4.3.3)
   gtable                              0.3.5      2024-04-22 [1] CRAN (R 4.3.2)
   gtools                              3.9.5      2023-11-20 [1] CRAN (R 4.3.0)
   hablar                              0.3.2      2023-03-12 [1] CRAN (R 4.3.0)
   HDF5Array                           1.30.1     2024-03-06 [1] Bioconductor 3.18 (R 4.3.3)
   HDO.db                              0.99.1     2024-05-20 [1] Bioconductor
   Hmisc                               5.1-3      2024-05-28 [1] CRAN (R 4.3.3)
   hms                                 1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
   htmlTable                           2.4.3      2024-07-21 [1] CRAN (R 4.3.3)
   htmltools                           0.5.8.1    2024-04-04 [1] CRAN (R 4.3.2)
   htmlwidgets                         1.6.4      2023-12-06 [1] CRAN (R 4.3.0)
   httpuv                              1.6.15     2024-03-26 [1] CRAN (R 4.3.2)
   httr                                1.4.7      2023-08-15 [1] CRAN (R 4.3.0)
   igraph                              2.0.3      2024-03-13 [1] CRAN (R 4.3.2)
   illuminaio                          0.44.0     2023-10-24 [1] Bioconductor
   interactiveDisplayBase              1.40.0     2023-10-24 [1] Bioconductor
   IRanges                           * 2.36.0     2023-10-24 [1] Bioconductor
   irlba                               2.3.5.1    2022-10-03 [1] CRAN (R 4.3.0)
   iterators                           1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
   jsonlite                            1.8.8      2023-12-04 [1] CRAN (R 4.3.0)
   KEGGREST                            1.42.0     2023-10-24 [1] Bioconductor
   KernSmooth                          2.23-24    2024-05-17 [1] CRAN (R 4.3.3)
   knitr                               1.48       2024-07-07 [1] CRAN (R 4.3.3)
   labeling                            0.4.3      2023-08-29 [1] CRAN (R 4.3.0)
   later                               1.3.2      2023-12-06 [1] CRAN (R 4.3.0)
   lattice                             0.22-6     2024-03-20 [1] CRAN (R 4.3.2)
   lazyeval                            0.2.2      2019-03-15 [1] CRAN (R 4.3.0)
   lifecycle                           1.0.4      2023-11-07 [1] CRAN (R 4.3.0)
   limma                               3.58.1     2023-10-31 [1] Bioconductor
   locfit                              1.5-9.10   2024-06-24 [1] CRAN (R 4.3.3)
   LOLA                                1.32.0     2023-10-24 [1] Bioconductor
   magrittr                            2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
   mapplots                            1.5.2      2023-08-25 [1] CRAN (R 4.3.0)
   MASS                                7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.0)
   Matrix                              1.6-5      2024-01-11 [1] CRAN (R 4.3.0)
   MatrixGenerics                    * 1.14.0     2023-10-24 [1] Bioconductor
   matrixStats                       * 1.4.1      2024-09-08 [1] CRAN (R 4.3.3)
   mclust                              6.1.1      2024-04-29 [1] CRAN (R 4.3.2)
   memoise                             2.0.1      2021-11-26 [1] CRAN (R 4.3.0)
   metapod                             1.10.1     2023-12-24 [1] Bioconductor 3.18 (R 4.3.2)
   MetBrewer                           0.2.0      2022-03-21 [1] CRAN (R 4.3.0)
   mime                                0.12       2021-09-28 [1] CRAN (R 4.3.0)
   minfi                               1.48.0     2023-10-24 [1] Bioconductor
   miniUI                              0.1.1.1    2018-05-18 [1] CRAN (R 4.3.0)
   msa                                 1.34.0     2023-10-24 [1] Bioconductor
   multtest                            2.58.0     2023-10-24 [1] Bioconductor
   munsell                             0.5.1      2024-04-01 [1] CRAN (R 4.3.2)
 P niar                              * 0.3.0      2024-10-07 [?] Github (Ni-Ar/niar@491dcc6)
   nlme                                3.1-166    2024-08-14 [1] CRAN (R 4.3.3)
   NLP                                 0.3-0      2024-08-05 [1] CRAN (R 4.3.3)
   nnet                                7.3-19     2023-05-03 [1] CRAN (R 4.3.2)
   nor1mix                             1.3-3      2024-04-06 [1] CRAN (R 4.3.2)
   openssl                             2.2.1      2024-08-16 [1] CRAN (R 4.3.3)
   openxlsx                            4.2.7.1    2024-09-20 [1] CRAN (R 4.3.3)
   org.Hs.eg.db                      * 3.18.0     2024-05-20 [1] Bioconductor
   outliers                            0.15       2022-03-26 [1] CRAN (R 4.3.0)
   patchwork                         * 1.2.0      2024-01-08 [1] CRAN (R 4.3.0)
   PCAtools                            2.14.0     2023-10-24 [1] Bioconductor
   PerformanceAnalytics                2.0.4      2020-02-06 [1] CRAN (R 4.3.0)
   permute                             0.9-7      2022-01-27 [1] CRAN (R 4.3.0)
   pheatmap                            1.0.12     2019-01-04 [1] CRAN (R 4.3.0)
   pillar                              1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
   pkgbuild                            1.4.4      2024-03-17 [1] CRAN (R 4.3.2)
   pkgconfig                           2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
   pkgload                             1.4.0      2024-06-28 [1] CRAN (R 4.3.3)
   plotrix                             3.8-4      2023-11-10 [1] CRAN (R 4.3.0)
   plyr                                1.8.9      2023-10-02 [1] CRAN (R 4.3.0)
   plyranges                           1.22.0     2023-10-24 [1] Bioconductor
   png                                 0.1-8      2022-11-29 [1] CRAN (R 4.3.0)
   polyclip                            1.10-7     2024-07-23 [1] CRAN (R 4.3.3)
   preprocessCore                      1.64.0     2023-10-24 [1] Bioconductor
   prettyunits                         1.2.0      2023-09-24 [1] CRAN (R 4.3.0)
   profvis                             0.3.8      2023-05-02 [1] CRAN (R 4.3.0)
   progress                            1.2.3      2023-12-06 [1] CRAN (R 4.3.0)
   promises                            1.3.0      2024-04-05 [1] CRAN (R 4.3.2)
   ProtGenerics                        1.34.0     2023-10-24 [1] Bioconductor
   proxy                               0.4-27     2022-06-09 [1] CRAN (R 4.3.0)
   psychTools                          2.4.3      2024-03-19 [1] CRAN (R 4.3.2)
   purrr                               1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
   quadprog                            1.5-8      2019-11-20 [1] CRAN (R 4.3.0)
   qvalue                              2.34.0     2023-10-24 [1] Bioconductor
   R.methodsS3                         1.8.2      2022-06-13 [1] CRAN (R 4.3.0)
   R.oo                                1.26.0     2024-01-24 [1] CRAN (R 4.3.2)
   R.utils                             2.12.3     2023-11-18 [1] CRAN (R 4.3.0)
   R2HTML                              2.3.4      2024-06-16 [1] CRAN (R 4.3.3)
   R6                                  2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
   rappdirs                            0.3.3      2021-01-31 [1] CRAN (R 4.3.0)
   RBGL                                1.78.0     2023-10-24 [1] Bioconductor
   RColorBrewer                        1.1-3      2022-04-03 [1] CRAN (R 4.3.0)
   Rcpp                                1.0.13     2024-07-17 [1] CRAN (R 4.3.3)
   RCurl                               1.98-1.16  2024-07-11 [1] CRAN (R 4.3.3)
   readr                             * 2.1.5      2024-01-10 [1] CRAN (R 4.3.0)
   readxl                              1.4.3      2023-07-06 [1] CRAN (R 4.3.0)
   regioneR                            1.34.0     2023-10-24 [1] Bioconductor
   remotes                             2.5.0      2024-03-17 [1] CRAN (R 4.3.2)
   reshape                             0.8.9      2022-04-12 [1] CRAN (R 4.3.0)
   reshape2                            1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
   restfulr                            0.0.15     2022-06-16 [1] CRAN (R 4.3.0)
   reticulate                          1.39.0     2024-09-05 [1] CRAN (R 4.3.3)
   rGREAT                              2.4.0      2023-10-24 [1] Bioconductor
   rhdf5                               2.46.1     2023-11-29 [1] Bioconductor
   rhdf5filters                        1.14.1     2023-11-06 [1] Bioconductor
   Rhdf5lib                            1.24.2     2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
   rjson                               0.2.21     2022-01-09 [1] CRAN (R 4.3.0)
   rlang                               1.1.4      2024-06-04 [1] CRAN (R 4.3.3)
   rmarkdown                           2.28       2024-08-17 [1] CRAN (R 4.3.3)
   rngtools                            1.5.2      2021-09-20 [1] CRAN (R 4.3.0)
   rpart                               4.1.23     2023-12-05 [1] CRAN (R 4.3.0)
   rprojroot                           2.0.4      2023-11-05 [1] CRAN (R 4.3.0)
   rrvgo                               1.14.2     2024-03-11 [1] Bioconductor 3.18 (R 4.3.3)
   Rsamtools                           2.18.0     2023-10-24 [1] Bioconductor
   RSpectra                            0.16-2     2024-07-18 [1] CRAN (R 4.3.3)
   RSQLite                             2.3.7      2024-05-27 [1] CRAN (R 4.3.3)
   rstatix                           * 0.7.2      2023-02-01 [1] CRAN (R 4.3.0)
   rstudioapi                          0.16.0     2024-03-24 [1] CRAN (R 4.3.2)
   rsvd                                1.0.5      2021-04-16 [1] CRAN (R 4.3.0)
   rtf                                 0.4-14.1   2020-03-22 [1] CRAN (R 4.3.0)
   rtracklayer                         1.62.0     2023-10-24 [1] Bioconductor
   RUnit                               0.4.33     2024-02-22 [1] CRAN (R 4.3.2)
   S4Arrays                            1.2.1      2024-03-06 [1] Bioconductor 3.18 (R 4.3.3)
   S4Vectors                         * 0.40.2     2023-11-23 [1] Bioconductor
   ScaledMatrix                        1.10.0     2023-10-24 [1] Bioconductor
   scales                              1.3.0      2023-11-28 [1] CRAN (R 4.3.0)
   scatterpie                          0.2.4      2024-08-28 [1] CRAN (R 4.3.3)
   scrime                              1.3.5      2018-12-01 [1] CRAN (R 4.3.0)
   seqinr                              4.2-36     2023-12-08 [1] CRAN (R 4.3.0)
   sessioninfo                         1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
   shadowtext                          0.1.4      2024-07-18 [1] CRAN (R 4.3.3)
   shape                               1.4.6.1    2024-02-23 [1] CRAN (R 4.3.2)
   shiny                               1.9.1      2024-08-01 [1] CRAN (R 4.3.3)
   sigFeature                          1.20.0     2023-10-24 [1] Bioconductor
   siggenes                            1.76.0     2023-10-24 [1] Bioconductor
   slam                                0.1-53     2024-09-02 [1] CRAN (R 4.3.3)
   sm                                * 2.2-6.0    2024-02-17 [1] CRAN (R 4.3.2)
   SparseArray                         1.2.4      2024-02-11 [1] Bioconductor 3.18 (R 4.3.2)
   SparseM                             1.84-2     2024-07-17 [1] CRAN (R 4.3.3)
   sparseMatrixStats                   1.14.0     2023-10-24 [1] Bioconductor
   statmod                             1.5.0      2023-01-06 [1] CRAN (R 4.3.0)
   stringi                             1.8.4      2024-05-06 [1] CRAN (R 4.3.2)
   stringr                             1.5.1      2023-11-14 [1] CRAN (R 4.3.0)
   SummarizedExperiment              * 1.32.0     2023-10-24 [1] Bioconductor
   survival                            3.7-0      2024-06-05 [1] CRAN (R 4.3.3)
   tibble                              3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
   tidygraph                           1.3.1      2024-01-30 [1] CRAN (R 4.3.2)
   tidyr                             * 1.3.1      2024-01-24 [1] CRAN (R 4.3.2)
   tidyselect                          1.2.1      2024-03-11 [1] CRAN (R 4.3.2)
   tidytree                            0.4.6      2023-12-12 [1] CRAN (R 4.3.0)
   tm                                  0.7-14     2024-08-13 [1] CRAN (R 4.3.3)
   treeio                              1.26.0     2023-10-24 [1] Bioconductor
   treemap                             2.4-4      2023-05-25 [1] CRAN (R 4.3.0)
   tweenr                              2.0.3      2024-02-26 [1] CRAN (R 4.3.2)
   TxDb.Hsapiens.UCSC.hg19.knownGene   3.2.2      2024-09-10 [1] Bioconductor
   TxDb.Hsapiens.UCSC.hg38.knownGene   3.18.0     2024-09-10 [1] Bioconductor
   tzdb                                0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
   umap                                0.2.10.0   2023-02-01 [1] CRAN (R 4.3.0)
   urlchecker                          1.0.1      2021-11-30 [1] CRAN (R 4.3.0)
   usethis                             3.0.0      2024-07-29 [1] CRAN (R 4.3.3)
   utf8                                1.2.4      2023-10-22 [1] CRAN (R 4.3.0)
   vctrs                               0.6.5      2023-12-01 [1] CRAN (R 4.3.0)
   vioplot                           * 0.5.0      2024-07-05 [1] CRAN (R 4.3.3)
   viridis                             0.6.5      2024-01-29 [1] CRAN (R 4.3.2)
   viridisLite                         0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
   vroom                               1.6.5      2023-12-05 [1] CRAN (R 4.3.0)
   wesanderson                         0.3.7      2023-10-31 [1] CRAN (R 4.3.0)
   withr                               3.0.1      2024-07-31 [1] CRAN (R 4.3.3)
   wordcloud                           2.6        2018-08-24 [1] CRAN (R 4.3.0)
   WriteXLS                            6.7.0      2024-07-20 [1] CRAN (R 4.3.3)
   xfun                                0.47       2024-08-17 [1] CRAN (R 4.3.3)
   XICOR                               0.4.1      2023-04-21 [1] CRAN (R 4.3.0)
   XML                                 3.99-0.17  2024-06-25 [1] CRAN (R 4.3.3)
   xml2                                1.3.6      2023-12-04 [1] CRAN (R 4.3.0)
   xtable                              1.8-4      2019-04-21 [1] CRAN (R 4.3.0)
   xts                                 0.14.0     2024-06-05 [1] CRAN (R 4.3.3)
   XVector                             0.42.0     2023-10-24 [1] Bioconductor
   yaml                                2.3.10     2024-07-26 [1] CRAN (R 4.3.3)
   yulab.utils                         0.1.7      2024-08-26 [1] CRAN (R 4.3.3)
   zip                                 2.3.1      2024-01-27 [1] CRAN (R 4.3.2)
   zlibbioc                            1.48.2     2024-03-13 [1] Bioconductor 3.18 (R 4.3.3)
   zoo                               * 1.8-12     2023-04-13 [1] CRAN (R 4.3.0)

 [1] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library

 P ── Loaded and on-disk path mismatch.

──────────────────────────────────────────────────────────────────────────────

8.1 Quarto

This document is generated using Quarto which enables to weave together content and executable code into a finished document. This is an improved version of what used to be called ‘Rmarkdown’. The document hides all code chunks, but they can be opened up with the drop down arrow. On the top right corner there’s a toggle for dark-mode. To learn more about Quarto see here.

References

Cui, Haissi, Jolene K. Diedrich, Douglas C. Wu, Justin J. Lim, Ryan M. Nottingham, James J. Moresco, John R. Yates, Benjamin J. Blencowe, Alan M. Lambowitz, and Paul Schimmel. 2023. Arg-tRNA synthetase links inflammatory metabolism to RNA splicing and nuclear trafficking via SRRM2.” Nature Cell Biology 25 (4): 592–603. https://doi.org/10.1038/s41556-023-01118-8.
Zhang, Mengjun, Zhuang Gu, Shuanghui Guo, Yingtian Sun, Suibin Ma, Shuo Yang, Jierui Guo, et al. 2024. SRRM2 phase separation drives assembly of nuclear speckle subcompartments. Cell Reports 43 (3): 113827. https://doi.org/10.1016/j.celrep.2024.113827.